html2pdf_api/pool.rs
1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//! ├─ BrowserPoolInner (shared state)
21//! │ ├─ available: Vec<TrackedBrowser> (pooled, ready to use)
22//! │ ├─ active: HashMap<id, TrackedBrowser> (in-use, tracked for health)
23//! │ └─ replacement_tasks: Vec<JoinHandle> (async replacement creators)
24//! └─ keep_alive_handle: JoinHandle (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//! // Create pool
41//! let mut pool = BrowserPool::builder()
42//! .config(
43//! BrowserPoolConfigBuilder::new()
44//! .max_pool_size(5)
45//! .warmup_count(3)
46//! .build()?
47//! )
48//! .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//! .build()?;
50//!
51//! // Warmup
52//! pool.warmup().await?;
53//!
54//! // Use browsers
55//! {
56//! let browser = pool.get()?;
57//! let tab = browser.new_tab()?;
58//! // ... do work ...
59//! } // browser returned to pool automatically
60//!
61//! // Shutdown
62//! pool.shutdown_async().await;
63//!
64//! Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107 /// Configuration (immutable after creation).
108 config: BrowserPoolConfig,
109
110 /// Browsers available for checkout (not currently in use).
111 ///
112 /// Protected by Mutex. Browsers are moved from here when checked out
113 /// and returned here when released (if pool not full).
114 available: Mutex<Vec<TrackedBrowser>>,
115
116 /// All browsers that exist (both pooled and checked out).
117 ///
118 /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119 /// Maps browser ID -> TrackedBrowser for fast lookup.
120 active: Mutex<HashMap<u64, TrackedBrowser>>,
121
122 /// Factory for creating new browser instances.
123 factory: Box<dyn BrowserFactory>,
124
125 /// Atomic flag indicating shutdown in progress.
126 ///
127 /// Checked before expensive operations. Once set, no new operations start.
128 shutting_down: AtomicBool,
129
130 /// Background tasks creating replacement browsers.
131 ///
132 /// Tracked so we can abort them during shutdown.
133 replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135 /// Handle to tokio runtime for spawning async tasks.
136 ///
137 /// Captured at creation time to allow spawning from any context.
138 runtime_handle: tokio::runtime::Handle,
139
140 /// Shutdown signaling mechanism for keep-alive thread.
141 ///
142 /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143 /// instead of waiting for full ping_interval.
144 shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148 /// Create a new browser pool inner state.
149 ///
150 /// # Parameters
151 ///
152 /// * `config` - Validated configuration.
153 /// * `factory` - Browser factory for creating instances.
154 ///
155 /// # Panics
156 ///
157 /// Panics if called outside a tokio runtime context.
158 pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159 log::info!(
160 "🚀 Initializing browser pool with capacity {}",
161 config.max_pool_size
162 );
163 log::debug!(
164 "📋 Pool config: warmup={}, TTL={}s, ping_interval={}s",
165 config.warmup_count,
166 config.browser_ttl.as_secs(),
167 config.ping_interval.as_secs()
168 );
169
170 // Capture runtime handle for spawning async tasks
171 // This allows us to spawn from sync contexts (like Drop)
172 let runtime_handle = tokio::runtime::Handle::current();
173
174 Arc::new(Self {
175 config,
176 available: Mutex::new(Vec::new()),
177 active: Mutex::new(HashMap::new()),
178 factory,
179 shutting_down: AtomicBool::new(false),
180 replacement_tasks: Mutex::new(Vec::new()),
181 runtime_handle,
182 shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183 })
184 }
185
186 /// Create a browser directly without using the pool.
187 ///
188 /// Used for:
189 /// - Initial warmup
190 /// - Replacing failed browsers
191 /// - When pool is empty
192 ///
193 /// # Important
194 ///
195 /// Adds the browser to `active` tracking immediately for health monitoring.
196 ///
197 /// # Errors
198 ///
199 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
200 /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
201 pub(crate) fn create_browser_direct(&self) -> Result<TrackedBrowser> {
202 // Early exit if shutting down (don't waste time creating browsers)
203 if self.shutting_down.load(Ordering::Acquire) {
204 log::debug!("🛑 Skipping browser creation - pool is shutting down");
205 return Err(BrowserPoolError::ShuttingDown);
206 }
207
208 log::debug!("📦 Creating new browser directly via factory...");
209
210 // Factory handles all Chrome launch complexity
211 let browser = self.factory.create()?;
212
213 // Wrap with tracking metadata
214 let tracked = TrackedBrowser::new(browser)?;
215 let id = tracked.id();
216
217 // Add to active tracking immediately for health monitoring
218 // This ensures keep-alive thread will monitor it
219 if let Ok(mut active) = self.active.lock() {
220 active.insert(id, tracked.clone());
221 log::debug!(
222 "📊 Browser {} added to active tracking (total active: {})",
223 id,
224 active.len()
225 );
226 } else {
227 log::warn!(
228 "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
229 id
230 );
231 }
232
233 log::info!("✅ Created new browser with ID {}", id);
234 Ok(tracked)
235 }
236
237 /// Get a browser from pool or create a new one.
238 ///
239 /// # Algorithm
240 ///
241 /// 1. Loop through pooled browsers
242 /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
243 /// - If near expiry: Skip (drop) it immediately.
244 /// - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
245 /// 3. For valid browsers, perform detailed health check (without holding locks)
246 /// 4. If healthy, return it
247 /// 5. If unhealthy, remove from active tracking and try next
248 /// 6. If pool empty or all skipped/unhealthy, create new browser
249 ///
250 /// # Critical: Lock-Free Health Checks
251 ///
252 /// Health checks are performed WITHOUT holding locks to avoid blocking
253 /// other threads. This is why we use a loop pattern instead of iterator.
254 ///
255 /// # Returns
256 ///
257 /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
258 ///
259 /// # Errors
260 ///
261 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
262 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
263 pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
264 log::debug!("🔍 Attempting to get browser from pool...");
265
266 // Try to get from pool - LOOP pattern to avoid holding lock during health checks
267 // This is critical for concurrency: we release the lock between attempts
268 loop {
269 // Acquire lock briefly to pop one browser
270 let tracked_opt = {
271 let mut available = self.available.lock().unwrap_or_else(|poisoned| {
272 log::warn!("Pool available lock poisoned, recovering");
273 poisoned.into_inner()
274 });
275 let popped = available.pop();
276 log::trace!("📊 Pool size after pop: {}", available.len());
277 popped
278 }; // Lock released here - critical for performance
279
280 if let Some(tracked) = tracked_opt {
281 // === LOGIC START: Grace Period Check ===
282 let age = tracked.created_at().elapsed();
283 let ttl = self.config.browser_ttl;
284
285 // Safety margin matching your stagger interval
286 let safety_margin = Duration::from_secs(30);
287
288 // If browser is about to expire, don't use it.
289 if age + safety_margin > ttl {
290 log::debug!(
291 "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
292 tracked.id(),
293 age.as_secs()
294 );
295
296 // CRITICAL: We do NOT remove/recreate here.
297 // By simply 'continuing', we drop this 'tracked' instance.
298 // 1. It is NOT returned to 'available' (so no user gets it).
299 // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
300 // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
301 continue;
302 }
303 // === LOGIC END: Grace Period Check ===
304
305 log::debug!("🔍 Testing browser {} from pool for health...", tracked.id());
306
307 // Detailed health check WITHOUT holding any locks
308 // This prevents blocking other threads during I/O
309 match tracked.browser().new_tab() {
310 Ok(tab) => {
311 log::trace!(
312 "✅ Browser {} health check: new_tab() successful",
313 tracked.id()
314 );
315
316 // Test navigation capability (full health check)
317 match tab
318 .navigate_to("data:text/html,<html><body>Health check</body></html>")
319 {
320 Ok(_) => {
321 log::trace!(
322 "✅ Browser {} health check: navigation successful",
323 tracked.id()
324 );
325
326 // Test cleanup capability
327 match tab.close(true) {
328 Ok(_) => {
329 log::debug!(
330 "✅ Browser {} passed full health check - ready for use",
331 tracked.id()
332 );
333
334 // Get pool size for logging (brief lock)
335 let pool_size = {
336 let available = self.available.lock().unwrap_or_else(|poisoned| {
337 log::warn!("Pool available lock poisoned, recovering");
338 poisoned.into_inner()
339 });
340 available.len()
341 };
342
343 log::info!(
344 "♻️ Reusing healthy browser {} from pool (pool size: {})",
345 tracked.id(),
346 pool_size
347 );
348
349 // Return healthy browser wrapped in RAII handle
350 return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
351 }
352 Err(e) => {
353 log::warn!(
354 "❌ Browser {} health check: tab close failed: {}",
355 tracked.id(),
356 e
357 );
358 }
359 }
360 }
361 Err(e) => {
362 log::warn!(
363 "❌ Browser {} health check: navigation failed: {}",
364 tracked.id(),
365 e
366 );
367 }
368 }
369 }
370 Err(e) => {
371 log::warn!(
372 "❌ Browser {} health check: new_tab() failed: {}",
373 tracked.id(),
374 e
375 );
376 }
377 }
378
379 // If we reach here, health check failed
380 // Remove from active tracking (browser is dead)
381 log::warn!(
382 "🗑️ Removing unhealthy browser {} from active tracking",
383 tracked.id()
384 );
385 {
386 let mut active = self.active.lock().unwrap_or_else(|poisoned| {
387 log::warn!("Pool active lock poisoned, recovering");
388 poisoned.into_inner()
389 });
390 active.remove(&tracked.id());
391 log::debug!("📊 Active browsers after removal: {}", active.len());
392 }
393
394 // Continue loop to try next browser in pool
395 log::debug!("🔍 Trying next browser from pool...");
396 } else {
397 // Pool is empty, break to create new browser
398 log::debug!("📥 Pool is empty, will create new browser");
399 break;
400 }
401 }
402
403 // Pool is empty or no healthy browsers found
404 log::info!("📦 Creating new browser (pool was empty or all browsers unhealthy)");
405
406 let tracked = self.create_browser_direct()?;
407
408 log::info!("✅ Returning newly created browser {}", tracked.id());
409 Ok(BrowserHandle::new(tracked, Arc::clone(self)))
410 }
411
412 /// Return a browser to the pool (called by BrowserHandle::drop).
413 ///
414 /// # Critical Lock Ordering
415 ///
416 /// Always acquires locks in order: active -> available.
417 /// Both locks are held together to prevent race conditions.
418 ///
419 /// # Algorithm
420 ///
421 /// 1. Acquire both locks (order: active, then available)
422 /// 2. Verify browser is in active tracking
423 /// 3. Check TTL - if expired, retire and trigger replacement
424 /// 4. If pool has space, add to available pool
425 /// 5. If pool full, remove from active (browser gets dropped)
426 ///
427 /// # Parameters
428 ///
429 /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
430 /// * `tracked` - The browser being returned.
431 pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: TrackedBrowser) {
432 log::debug!("♻️ Returning browser {} to pool...", tracked.id());
433
434 // Early exit if shutting down (don't waste time managing pool)
435 if self_arc.shutting_down.load(Ordering::Acquire) {
436 log::debug!(
437 "🛑 Pool shutting down, not returning browser {}",
438 tracked.id()
439 );
440 return;
441 }
442
443 // CRITICAL: Always acquire in order: active -> pool
444 // Holding both locks prevents ALL race conditions:
445 // - Prevents concurrent modifications to browser state
446 // - Prevents duplicate returns
447 // - Ensures pool size limits are respected
448 let mut active = self_arc.active.lock().unwrap_or_else(|poisoned| {
449 log::warn!("Pool active lock poisoned, recovering");
450 poisoned.into_inner()
451 });
452 let mut pool = self_arc.available.lock().unwrap_or_else(|poisoned| {
453 log::warn!("Pool available lock poisoned, recovering");
454 poisoned.into_inner()
455 });
456
457 // Verify browser is actually tracked (sanity check)
458 if !active.contains_key(&tracked.id()) {
459 log::warn!(
460 "❌ Browser {} not in active tracking (probably already removed), skipping return",
461 tracked.id()
462 );
463 return;
464 }
465
466 // Check TTL before returning to pool
467 // Expired browsers should be retired to prevent memory leaks
468 if tracked.is_expired(self_arc.config.browser_ttl) {
469 log::info!(
470 "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
471 tracked.id(),
472 tracked.age_minutes(),
473 self_arc.config.browser_ttl.as_secs() / 60
474 );
475
476 // Remove from active tracking
477 active.remove(&tracked.id());
478 log::debug!("📊 Active browsers after TTL retirement: {}", active.len());
479
480 // Release locks before spawning replacement task
481 drop(active);
482 drop(pool);
483
484 // Trigger async replacement creation (non-blocking)
485 log::debug!("🔍 Triggering replacement browser creation for expired browser");
486 Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
487 return;
488 }
489
490 // Prevent duplicate returns (defensive programming)
491 if pool.iter().any(|b| b.id() == tracked.id()) {
492 log::warn!(
493 "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
494 tracked.id()
495 );
496 return;
497 }
498
499 // Check if pool has space for this browser
500 if pool.len() < self_arc.config.max_pool_size {
501 // Add to pool for reuse
502 pool.push(tracked.clone());
503 log::info!(
504 "♻️ Browser {} returned to pool (pool size: {}/{})",
505 tracked.id(),
506 pool.len(),
507 self_arc.config.max_pool_size
508 );
509 } else {
510 // Pool is full, remove from tracking (browser will be dropped)
511 log::debug!(
512 "️ Pool full ({}/{}), removing browser {} from system",
513 pool.len(),
514 self_arc.config.max_pool_size,
515 tracked.id()
516 );
517 active.remove(&tracked.id());
518 log::debug!("📊 Active browsers after removal: {}", active.len());
519 }
520 }
521
522 /// Asynchronously create replacement browsers (internal helper).
523 ///
524 /// This is the async work function that actually creates browsers.
525 /// It's spawned as a tokio task by `spawn_replacement_creation`.
526 ///
527 /// # Algorithm
528 ///
529 /// 1. Check shutdown flag before each creation
530 /// 2. Check pool space before each creation
531 /// 3. Use spawn_blocking for CPU-bound browser creation
532 /// 4. Add successful browsers to pool
533 /// 5. Log detailed status
534 ///
535 /// # Parameters
536 ///
537 /// * `inner` - Arc reference to pool state.
538 /// * `count` - Number of browsers to attempt to create.
539 async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
540 log::info!(
541 "🔍 Starting async replacement creation for {} browsers",
542 count
543 );
544
545 let mut created_count = 0;
546 let mut failed_count = 0;
547
548 for i in 0..count {
549 // Check shutdown flag before each expensive operation
550 if inner.shutting_down.load(Ordering::Acquire) {
551 log::info!(
552 "🛑 Shutdown detected during replacement creation, stopping at {}/{}",
553 i,
554 count
555 );
556 break;
557 }
558
559 // Check if pool has space BEFORE creating (avoid wasted work)
560 let pool_has_space = {
561 let pool = inner.available.lock().unwrap_or_else(|poisoned| {
562 log::warn!("Pool available lock poisoned, recovering");
563 poisoned.into_inner()
564 });
565 let has_space = pool.len() < inner.config.max_pool_size;
566 log::trace!(
567 "📊 Pool space check: {}/{} (has space: {})",
568 pool.len(),
569 inner.config.max_pool_size,
570 has_space
571 );
572 has_space
573 };
574
575 if !pool_has_space {
576 log::warn!(
577 "⚠️ Pool is full, stopping replacement creation at {}/{}",
578 i,
579 count
580 );
581 break;
582 }
583
584 log::debug!("📦 Creating replacement browser {}/{}", i + 1, count);
585
586 // Use spawn_blocking for CPU-bound browser creation
587 // This prevents blocking the async runtime
588 let inner_clone = Arc::clone(&inner);
589 let result =
590 tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
591
592 match result {
593 Ok(Ok(tracked)) => {
594 let id = tracked.id();
595
596 // Add to pool (with space check to handle race conditions)
597 let mut pool = inner.available.lock().unwrap_or_else(|poisoned| {
598 log::warn!("Pool available lock poisoned, recovering");
599 poisoned.into_inner()
600 });
601
602 // Double-check space (another thread might have added browsers)
603 if pool.len() < inner.config.max_pool_size {
604 pool.push(tracked);
605 created_count += 1;
606 log::info!(
607 "✅ Created replacement browser {} and added to pool ({}/{})",
608 id,
609 i + 1,
610 count
611 );
612 } else {
613 log::warn!(
614 "⚠️ Pool became full during creation, replacement browser {} kept in active only",
615 id
616 );
617 created_count += 1; // Still count as created (just not pooled)
618 }
619 }
620 Ok(Err(e)) => {
621 failed_count += 1;
622 log::error!(
623 "❌ Failed to create replacement browser {}/{}: {}",
624 i + 1,
625 count,
626 e
627 );
628 }
629 Err(e) => {
630 failed_count += 1;
631 log::error!(
632 "❌ Replacement browser {}/{} task panicked: {:?}",
633 i + 1,
634 count,
635 e
636 );
637 }
638 }
639 }
640
641 // Final status report
642 let pool_size = inner.available.lock().unwrap_or_else(|poisoned| {
643 log::warn!("Pool available lock poisoned, recovering");
644 poisoned.into_inner()
645 }).len();
646 let active_size = inner.active.lock().unwrap_or_else(|poisoned| {
647 log::warn!("Pool active lock poisoned, recovering");
648 poisoned.into_inner()
649 }).len();
650
651 log::info!(
652 "🏁 Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
653 created_count,
654 count,
655 failed_count,
656 pool_size,
657 active_size
658 );
659 }
660
661 /// Spawn a background task to create replacement browsers.
662 ///
663 /// This is non-blocking and returns immediately. The actual browser
664 /// creation happens in a tokio task tracked in `replacement_tasks`.
665 ///
666 /// # Why Async
667 ///
668 /// Browser creation is slow (1-3 seconds per browser). Spawning async
669 /// tasks prevents blocking the caller.
670 ///
671 /// # Task Tracking
672 ///
673 /// Tasks are tracked so we can abort them during shutdown.
674 ///
675 /// # Parameters
676 ///
677 /// * `inner` - Arc reference to pool state.
678 /// * `count` - Number of replacement browsers to create.
679 pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
680 log::info!(
681 "📥 Spawning async task to create {} replacement browsers",
682 count
683 );
684
685 // Clone Arc for moving into async task
686 let inner_for_task = Arc::clone(&inner);
687
688 // Spawn async task on the captured runtime
689 let task_handle = inner.runtime_handle.spawn(async move {
690 Self::spawn_replacement_creation_async(inner_for_task, count).await;
691 });
692
693 // Track task handle for shutdown cleanup
694 if let Ok(mut tasks) = inner.replacement_tasks.lock() {
695 // Clean up finished tasks while we have the lock (housekeeping)
696 let original_count = tasks.len();
697 tasks.retain(|h| !h.is_finished());
698 let cleaned = original_count - tasks.len();
699
700 if cleaned > 0 {
701 log::trace!("🧹 Cleaned up {} finished replacement tasks", cleaned);
702 }
703
704 // Add new task
705 tasks.push(task_handle);
706
707 log::debug!("📋 Now tracking {} active replacement tasks", tasks.len());
708 } else {
709 log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
710 }
711 }
712
713 /// Get the pool configuration.
714 #[inline]
715 pub(crate) fn config(&self) -> &BrowserPoolConfig {
716 &self.config
717 }
718
719 /// Check if the pool is shutting down.
720 #[inline]
721 pub(crate) fn is_shutting_down(&self) -> bool {
722 self.shutting_down.load(Ordering::Acquire)
723 }
724
725 /// Set the shutdown flag.
726 #[inline]
727 pub(crate) fn set_shutting_down(&self, value: bool) {
728 self.shutting_down.store(value, Ordering::Release);
729 }
730
731 /// Get the shutdown signal for the keep-alive thread.
732 #[inline]
733 pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
734 &self.shutdown_signal
735 }
736
737 /// Get the available browsers count.
738 pub(crate) fn available_count(&self) -> usize {
739 self.available.lock().map(|g| g.len()).unwrap_or(0)
740 }
741
742 /// Get the active browsers count.
743 pub(crate) fn active_count(&self) -> usize {
744 self.active.lock().map(|g| g.len()).unwrap_or(0)
745 }
746
747 /// Get a snapshot of active browsers for health checking.
748 ///
749 /// Returns a cloned list to avoid holding locks during I/O.
750 pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, TrackedBrowser)> {
751 let active = self.active.lock().unwrap_or_else(|poisoned| {
752 log::warn!("Pool active lock poisoned, recovering");
753 poisoned.into_inner()
754 });
755 active
756 .iter()
757 .map(|(id, tracked)| (*id, tracked.clone()))
758 .collect()
759 }
760
761 /// Remove a browser from active tracking.
762 pub(crate) fn remove_from_active(&self, id: u64) -> Option<TrackedBrowser> {
763 let mut active = self.active.lock().unwrap_or_else(|poisoned| {
764 log::warn!("Pool active lock poisoned, recovering");
765 poisoned.into_inner()
766 });
767 active.remove(&id)
768 }
769
770 /// Remove browsers from the available pool by ID.
771 pub(crate) fn remove_from_available(&self, ids: &[u64]) {
772 let mut pool = self.available.lock().unwrap_or_else(|poisoned| {
773 log::warn!("Pool available lock poisoned, recovering");
774 poisoned.into_inner()
775 });
776 let original_size = pool.len();
777 pool.retain(|b| !ids.contains(&b.id()));
778 let removed = original_size - pool.len();
779 if removed > 0 {
780 log::debug!("🗑️ Removed {} browsers from available pool", removed);
781 }
782 }
783
784 /// Abort all replacement tasks.
785 pub(crate) fn abort_replacement_tasks(&self) -> usize {
786 if let Ok(mut tasks) = self.replacement_tasks.lock() {
787 let count = tasks.len();
788 for handle in tasks.drain(..) {
789 handle.abort();
790 }
791 count
792 } else {
793 0
794 }
795 }
796}
797
798// ============================================================================
799// BrowserPool
800// ============================================================================
801
802/// Main browser pool with lifecycle management.
803///
804/// This is the public-facing API for the browser pool. It wraps the internal
805/// state and manages the keep-alive thread.
806///
807/// # Overview
808///
809/// `BrowserPool` provides:
810/// - Browser checkout via [`get()`](Self::get)
811/// - Pool warmup via [`warmup()`](Self::warmup)
812/// - Statistics via [`stats()`](Self::stats)
813/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
814///
815/// # Example
816///
817/// ```rust,no_run
818/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
819/// use std::time::Duration;
820///
821/// #[tokio::main]
822/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
823/// // Create pool
824/// let mut pool = BrowserPool::builder()
825/// .config(
826/// BrowserPoolConfigBuilder::new()
827/// .max_pool_size(5)
828/// .warmup_count(3)
829/// .build()?
830/// )
831/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
832/// .build()?;
833///
834/// // Warmup
835/// pool.warmup().await?;
836///
837/// // Use browsers
838/// {
839/// let browser = pool.get()?;
840/// let tab = browser.new_tab()?;
841/// // ... do work ...
842/// } // browser returned to pool automatically
843///
844/// // Shutdown
845/// pool.shutdown_async().await;
846///
847/// Ok(())
848/// }
849/// ```
850///
851/// # Thread Safety
852///
853/// `BrowserPool` uses fine-grained internal locks (`Mutex<Vec>`, `Mutex<HashMap>`)
854/// so it is safe to share as `Arc<BrowserPool>` without an outer `Mutex`.
855/// Use [`into_shared()`](Self::into_shared) for convenience.
856pub struct BrowserPool {
857 /// Shared internal state.
858 inner: Arc<BrowserPoolInner>,
859
860 /// Handle to keep-alive monitoring thread.
861 ///
862 /// Option allows taking during shutdown. None means keep-alive disabled.
863 keep_alive_handle: Option<JoinHandle<()>>,
864}
865
866impl BrowserPool {
867 /// Convert pool into a shared `Arc<BrowserPool>` for use in web handlers.
868 ///
869 /// This is convenient for web frameworks that need shared state.
870 /// No outer `Mutex` is needed — the pool uses fine-grained internal locks.
871 ///
872 /// # Example
873 ///
874 /// ```rust,ignore
875 /// let pool = BrowserPool::builder()
876 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
877 /// .build()?
878 /// .into_shared();
879 ///
880 /// // Can now be cloned and shared across handlers
881 /// let pool_clone = Arc::clone(&pool);
882 /// ```
883 pub fn into_shared(self) -> Arc<BrowserPool> {
884 log::debug!("🔍 Converting BrowserPool into shared Arc<BrowserPool>");
885 Arc::new(self)
886 }
887
888 /// Create a new builder for constructing a BrowserPool.
889 ///
890 /// This is the recommended way to create a pool.
891 ///
892 /// # Example
893 ///
894 /// ```rust,ignore
895 /// let pool = BrowserPool::builder()
896 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
897 /// .build()?;
898 /// ```
899 pub fn builder() -> BrowserPoolBuilder {
900 BrowserPoolBuilder::new()
901 }
902
903 /// Get a browser from the pool (or create one if empty).
904 ///
905 /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
906 /// allowing transparent access to browser methods.
907 ///
908 /// # Automatic Return
909 ///
910 /// The browser is automatically returned to the pool when the handle
911 /// is dropped, even if your code panics (RAII pattern).
912 ///
913 /// # Errors
914 ///
915 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
916 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
917 /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
918 ///
919 /// # Example
920 ///
921 /// ```rust,ignore
922 /// let browser = pool.get()?;
923 /// let tab = browser.new_tab()?;
924 /// tab.navigate_to("https://example.com")?;
925 /// // browser returned automatically when it goes out of scope
926 /// ```
927 pub fn get(&self) -> Result<BrowserHandle> {
928 log::trace!("🎯 BrowserPool::get() called");
929 self.inner.get_or_create_browser()
930 }
931
932 /// Get pool statistics snapshot.
933 ///
934 /// # Returns
935 ///
936 /// [`PoolStats`] containing:
937 /// - `available`: Browsers in pool ready for checkout
938 /// - `active`: All browsers (pooled + checked out)
939 /// - `total`: Currently same as `active` (for future expansion)
940 ///
941 /// # Example
942 ///
943 /// ```rust,ignore
944 /// let stats = pool.stats();
945 /// println!("Available: {}, Active: {}", stats.available, stats.active);
946 /// ```
947 pub fn stats(&self) -> PoolStats {
948 let available = self.inner.available_count();
949 let active = self.inner.active_count();
950
951 log::trace!("📊 Pool stats: available={}, active={}", available, active);
952
953 PoolStats {
954 available,
955 active,
956 total: active,
957 }
958 }
959
960 /// Get a reference to the pool configuration.
961 ///
962 /// Returns the configuration that was used to create this pool.
963 /// The configuration is immutable after pool creation.
964 ///
965 /// # Example
966 ///
967 /// ```rust,ignore
968 /// let pool = BrowserPool::builder()
969 /// .config(
970 /// BrowserPoolConfigBuilder::new()
971 /// .max_pool_size(10)
972 /// .build()?
973 /// )
974 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
975 /// .build()?;
976 ///
977 /// println!("Max pool size: {}", pool.config().max_pool_size);
978 /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
979 /// ```
980 ///
981 /// # Use Cases
982 ///
983 /// - Logging configuration at startup
984 /// - Monitoring/metrics collection
985 /// - Readiness checks (comparing active count vs max_pool_size)
986 /// - Debugging pool behavior
987 #[inline]
988 pub fn config(&self) -> &BrowserPoolConfig {
989 self.inner.config()
990 }
991
992 /// Warmup the pool by pre-creating browsers.
993 ///
994 /// This is highly recommended to reduce first-request latency.
995 /// Should be called during application startup.
996 ///
997 /// # Process
998 ///
999 /// 1. Creates `warmup_count` browsers sequentially with staggered timing
1000 /// 2. Tests each browser with navigation
1001 /// 3. Returns all browsers to pool
1002 /// 4. Entire process has timeout (configurable via `warmup_timeout`)
1003 ///
1004 /// # Staggered Creation
1005 ///
1006 /// Browsers are created with a 30-second delay between them to ensure
1007 /// their TTLs are offset. This prevents all browsers from expiring
1008 /// at the same time.
1009 ///
1010 /// # Errors
1011 ///
1012 /// - Returns error if warmup times out.
1013 /// - Returns error if browser creation fails.
1014 ///
1015 /// # Example
1016 ///
1017 /// ```rust,ignore
1018 /// let pool = BrowserPool::builder()
1019 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1020 /// .build()?;
1021 ///
1022 /// // Warmup during startup
1023 /// pool.warmup().await?;
1024 /// ```
1025 pub async fn warmup(&self) -> Result<()> {
1026 let count = self.inner.config().warmup_count;
1027 let warmup_timeout = self.inner.config().warmup_timeout;
1028
1029 log::info!(
1030 "🔥 Starting browser pool warmup with {} instances (timeout: {}s)",
1031 count,
1032 warmup_timeout.as_secs()
1033 );
1034
1035 // Wrap entire warmup in timeout to prevent hanging forever
1036 let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
1037
1038 match warmup_result {
1039 Ok(Ok(())) => {
1040 let stats = self.stats();
1041 log::info!(
1042 "✅ Warmup completed successfully - Available: {}, Active: {}",
1043 stats.available,
1044 stats.active
1045 );
1046 Ok(())
1047 }
1048 Ok(Err(e)) => {
1049 log::error!("❌ Warmup failed with error: {}", e);
1050 Err(e)
1051 }
1052 Err(_) => {
1053 log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1054 Err(BrowserPoolError::Configuration(format!(
1055 "Warmup timed out after {}s",
1056 warmup_timeout.as_secs()
1057 )))
1058 }
1059 }
1060 }
1061
1062 /// Internal warmup implementation (separated for cleaner timeout wrapping).
1063 ///
1064 /// Creates browsers sequentially with a delay between them.
1065 /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1066 async fn warmup_internal(&self, count: usize) -> Result<()> {
1067 log::debug!("🛠️ Starting internal warmup process for {} browsers", count);
1068
1069 // STAGGER CONFIGURATION
1070 // We wait this long between creations to distribute expiration times
1071 let stagger_interval = Duration::from_secs(30);
1072
1073 let mut handles = Vec::new();
1074 let mut created_count = 0;
1075 let mut failed_count = 0;
1076
1077 for i in 0..count {
1078 log::debug!("🌐 Creating startup browser instance {}/{}", i + 1, count);
1079
1080 // Per-browser timeout (15s per browser is reasonable)
1081 // This prevents one slow browser from blocking entire warmup
1082 let browser_result = tokio::time::timeout(
1083 Duration::from_secs(15),
1084 tokio::task::spawn_blocking({
1085 let inner = Arc::clone(&self.inner);
1086 move || inner.create_browser_direct()
1087 }),
1088 )
1089 .await;
1090
1091 match browser_result {
1092 Ok(Ok(Ok(tracked))) => {
1093 log::debug!(
1094 "✅ Browser {} created, performing validation test...",
1095 tracked.id()
1096 );
1097
1098 // Test the browser with actual navigation
1099 match tracked.browser().new_tab() {
1100 Ok(tab) => {
1101 log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1102
1103 // Navigate to test page
1104 let nav_result = tab.navigate_to(
1105 "data:text/html,<html><body>Warmup test</body></html>",
1106 );
1107 if let Err(e) = nav_result {
1108 log::warn!(
1109 "⚠️ Browser {} test navigation failed: {}",
1110 tracked.id(),
1111 e
1112 );
1113 } else {
1114 log::trace!(
1115 "✅ Browser {} test: navigation successful",
1116 tracked.id()
1117 );
1118 }
1119
1120 // Clean up test tab
1121 let _ = tab.close(true);
1122
1123 // Keep handle so browser stays alive
1124 handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1125
1126 created_count += 1;
1127 log::info!(
1128 "✅ Browser instance {}/{} ready and validated",
1129 i + 1,
1130 count
1131 );
1132 }
1133 Err(e) => {
1134 failed_count += 1;
1135 log::error!(
1136 "❌ Browser {} validation test failed: {}",
1137 tracked.id(),
1138 e
1139 );
1140
1141 // Remove from active tracking since it's broken
1142 self.inner.remove_from_active(tracked.id());
1143 }
1144 }
1145 }
1146 Ok(Ok(Err(e))) => {
1147 failed_count += 1;
1148 log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1149 }
1150 Ok(Err(e)) => {
1151 failed_count += 1;
1152 log::error!(
1153 "❌ Browser {}/{} creation task panicked: {:?}",
1154 i + 1,
1155 count,
1156 e
1157 );
1158 }
1159 Err(_) => {
1160 failed_count += 1;
1161 log::error!(
1162 "❌ Browser {}/{} creation timed out (15s limit)",
1163 i + 1,
1164 count
1165 );
1166 }
1167 }
1168
1169 // === STAGGER LOGIC ===
1170 // If this is not the last browser, wait before creating the next one.
1171 // This ensures their TTLs are offset by `stagger_interval`.
1172 if i < count - 1 {
1173 log::info!(
1174 "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1175 stagger_interval.as_secs()
1176 );
1177 tokio::time::sleep(stagger_interval).await;
1178 }
1179 }
1180
1181 log::info!(
1182 "📊 Warmup creation phase: {} created, {} failed",
1183 created_count,
1184 failed_count
1185 );
1186
1187 // Return all browsers to pool by dropping handles
1188 log::debug!("🔍 Returning {} warmup browsers to pool...", handles.len());
1189 drop(handles);
1190
1191 // Small delay to ensure Drop handlers complete
1192 tokio::time::sleep(Duration::from_millis(300)).await;
1193
1194 let final_stats = self.stats();
1195 log::info!(
1196 "🏁 Warmup internal completed - Pool: {}, Active: {}",
1197 final_stats.available,
1198 final_stats.active
1199 );
1200
1201 Ok(())
1202 }
1203
1204 /// Start the keep-alive monitoring thread.
1205 ///
1206 /// This background thread:
1207 /// - Pings all active browsers periodically
1208 /// - Removes unresponsive browsers after max_ping_failures
1209 /// - Retires browsers that exceed TTL
1210 /// - Spawns replacement browsers as needed
1211 ///
1212 /// # Critical Design Notes
1213 ///
1214 /// - Uses condvar for immediate shutdown signaling
1215 /// - Never holds locks during I/O operations
1216 /// - Uses consistent lock ordering (active -> pool)
1217 ///
1218 /// # Parameters
1219 ///
1220 /// * `inner` - Arc reference to pool state.
1221 ///
1222 /// # Returns
1223 ///
1224 /// JoinHandle for the background thread.
1225 fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1226 let ping_interval = inner.config().ping_interval;
1227 let max_failures = inner.config().max_ping_failures;
1228 let browser_ttl = inner.config().browser_ttl;
1229 let shutdown_signal = Arc::clone(inner.shutdown_signal());
1230
1231 log::info!(
1232 "🚀 Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1233 ping_interval.as_secs(),
1234 max_failures,
1235 browser_ttl.as_secs() / 60
1236 );
1237
1238 thread::spawn(move || {
1239 log::info!("🏁 Keep-alive thread started successfully");
1240
1241 // Track consecutive failures per browser ID
1242 let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1243
1244 loop {
1245 // Wait for next ping interval OR shutdown signal (whichever comes first)
1246 // Using condvar instead of sleep allows immediate wake-up on shutdown
1247 let (lock, cvar) = &*shutdown_signal;
1248 let wait_result = {
1249 let shutdown = lock.lock().unwrap_or_else(|poisoned| {
1250 log::warn!("Shutdown lock poisoned, recovering");
1251 poisoned.into_inner()
1252 });
1253 cvar.wait_timeout(shutdown, ping_interval).unwrap_or_else(|poisoned| {
1254 log::warn!("Condvar wait_timeout lock poisoned, recovering");
1255 poisoned.into_inner()
1256 })
1257 };
1258
1259 let shutdown_flag = *wait_result.0;
1260 let timed_out = wait_result.1.timed_out();
1261
1262 // Check if we were signaled to shutdown
1263 if shutdown_flag {
1264 log::info!("🛑 Keep-alive received shutdown signal via condvar");
1265 break;
1266 }
1267
1268 // Double-check atomic shutdown flag (belt and suspenders)
1269 if inner.is_shutting_down() {
1270 log::info!("🛑 Keep-alive detected shutdown via atomic flag");
1271 break;
1272 }
1273
1274 // If spuriously woken (not timeout, not shutdown), continue waiting
1275 if !timed_out {
1276 log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1277 continue;
1278 }
1279
1280 log::trace!("⚡ Keep-alive ping cycle starting...");
1281
1282 // Collect browsers to ping WITHOUT holding locks
1283 // This is critical: we clone the list and release the lock
1284 // before doing any I/O operations
1285 let browsers_to_ping = inner.get_active_browsers_snapshot();
1286 log::trace!(
1287 "Keep-alive checking {} active browsers",
1288 browsers_to_ping.len()
1289 );
1290
1291 // Now ping browsers without holding any locks
1292 let mut to_remove = Vec::new();
1293 let mut expired_browsers = Vec::new();
1294
1295 for (id, tracked) in browsers_to_ping {
1296 // Check shutdown during ping loop (allows early exit)
1297 if inner.is_shutting_down() {
1298 log::info!("Shutdown detected during ping loop, exiting immediately");
1299 return;
1300 }
1301
1302 // Check TTL before pinging (no point pinging expired browsers)
1303 if tracked.is_expired(browser_ttl) {
1304 log::info!(
1305 "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1306 id,
1307 tracked.age_minutes(),
1308 browser_ttl.as_secs() / 60
1309 );
1310 expired_browsers.push(id);
1311 continue; // Skip ping for expired browsers
1312 }
1313
1314 // Perform health check (this is I/O, no locks held)
1315 use crate::traits::Healthcheck;
1316 match tracked.ping() {
1317 Ok(_) => {
1318 // Reset failure count on success
1319 if failure_counts.remove(&id).is_some() {
1320 log::debug!("Browser {} ping successful, failure count reset", id);
1321 }
1322 }
1323 Err(e) => {
1324 // Only process failures if NOT shutting down
1325 // (during shutdown, browsers may legitimately fail)
1326 if !inner.is_shutting_down() {
1327 let failures = failure_counts.entry(id).or_insert(0);
1328 *failures += 1;
1329
1330 log::warn!(
1331 "Browser {} ping failed (attempt {}/{}): {}",
1332 id,
1333 failures,
1334 max_failures,
1335 e
1336 );
1337
1338 // Remove if exceeded max failures
1339 if *failures >= max_failures {
1340 log::error!(
1341 "Browser {} exceeded max ping failures ({}), marking for removal",
1342 id,
1343 max_failures
1344 );
1345 to_remove.push(id);
1346 }
1347 }
1348 }
1349 }
1350 }
1351
1352 // Check shutdown before cleanup (avoid work if shutting down)
1353 if inner.is_shutting_down() {
1354 log::info!("Shutdown detected before cleanup, skipping and exiting");
1355 break;
1356 }
1357
1358 // Handle TTL retirements first (they need replacement browsers)
1359 if !expired_browsers.is_empty() {
1360 log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1361 Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1362 }
1363
1364 // Handle failed browsers (remove from tracking and pool)
1365 if !to_remove.is_empty() {
1366 log::warn!("Removing {} failed browsers from pool", to_remove.len());
1367
1368 // Track how many were actually removed so we know how many to replace
1369 let mut actual_removed_count = 0;
1370
1371 // Remove dead browsers from active tracking
1372 for id in &to_remove {
1373 if inner.remove_from_active(*id).is_some() {
1374 actual_removed_count += 1;
1375 log::debug!("Removed failed browser {} from active tracking", id);
1376 }
1377 failure_counts.remove(id);
1378 }
1379
1380 log::debug!(
1381 "Active browsers after failure cleanup: {}",
1382 inner.active_count()
1383 );
1384
1385 // Clean up pool (remove dead browsers)
1386 inner.remove_from_available(&to_remove);
1387
1388 log::debug!("Pool size after cleanup: {}", inner.available_count());
1389
1390 // Trigger replacement for the browsers we just removed
1391 if actual_removed_count > 0 {
1392 log::info!(
1393 "Spawning {} replacement browsers for failed ones",
1394 actual_removed_count
1395 );
1396 BrowserPoolInner::spawn_replacement_creation(
1397 Arc::clone(&inner),
1398 actual_removed_count,
1399 );
1400 }
1401 }
1402
1403 // Log keep-alive cycle summary
1404 log::debug!(
1405 "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1406 inner.active_count(),
1407 inner.available_count(),
1408 failure_counts.len()
1409 );
1410 }
1411
1412 log::info!("Keep-alive thread exiting cleanly");
1413 })
1414 }
1415
1416 /// Handle browser retirement due to TTL expiration.
1417 ///
1418 /// This function:
1419 /// 1. Removes expired browsers from active and pool tracking
1420 /// 2. Spawns async tasks to create replacement browsers
1421 /// 3. Maintains pool target size
1422 ///
1423 /// # Critical Lock Ordering
1424 ///
1425 /// Acquires active -> pool locks together to prevent races.
1426 ///
1427 /// # Parameters
1428 ///
1429 /// * `inner` - Arc reference to pool state.
1430 /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1431 /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1432 fn handle_browser_retirement(
1433 inner: &Arc<BrowserPoolInner>,
1434 expired_ids: Vec<u64>,
1435 failure_counts: &mut HashMap<u64, u32>,
1436 ) {
1437 log::info!(
1438 "Retiring {} expired browsers (TTL enforcement)",
1439 expired_ids.len()
1440 );
1441
1442 // Remove expired browsers from active tracking
1443 let mut retired_count = 0;
1444 for id in &expired_ids {
1445 if inner.remove_from_active(*id).is_some() {
1446 retired_count += 1;
1447 log::debug!("Removed expired browser {} from active tracking", id);
1448 }
1449 // Clean up failure tracking
1450 failure_counts.remove(id);
1451 }
1452
1453 // Remove from pool as well
1454 inner.remove_from_available(&expired_ids);
1455
1456 log::debug!(
1457 "After retirement - Active: {}, Pooled: {}",
1458 inner.active_count(),
1459 inner.available_count()
1460 );
1461
1462 // Create replacement browsers to maintain target count
1463 if retired_count > 0 {
1464 log::info!(
1465 "Spawning {} replacement browsers for retired ones",
1466 retired_count
1467 );
1468 BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1469 } else {
1470 log::debug!("No browsers were actually retired (already removed)");
1471 }
1472 }
1473
1474 /// Asynchronously shutdown the pool (recommended method).
1475 ///
1476 /// This is the preferred shutdown method as it can properly await
1477 /// async task cancellation. Should be called during application shutdown.
1478 ///
1479 /// # Shutdown Process
1480 ///
1481 /// 1. Set atomic shutdown flag (stops new operations)
1482 /// 2. Signal condvar to wake keep-alive thread immediately
1483 /// 3. Wait for keep-alive thread to exit (with timeout)
1484 /// 4. Abort all replacement creation tasks
1485 /// 5. Wait briefly for cleanup
1486 /// 6. Log final statistics
1487 ///
1488 /// # Timeout
1489 ///
1490 /// Keep-alive thread is given 5 seconds to exit gracefully.
1491 /// If it doesn't exit, we log an error but continue shutdown.
1492 ///
1493 /// # Example
1494 ///
1495 /// ```rust,ignore
1496 /// let mut pool = /* ... */;
1497 ///
1498 /// // During application shutdown
1499 /// pool.shutdown_async().await;
1500 /// ```
1501 pub async fn shutdown_async(&mut self) {
1502 log::info!("Shutting down browser pool (async mode)...");
1503
1504 // Step 1: Set shutdown flag (prevents new operations)
1505 self.inner.set_shutting_down(true);
1506 log::debug!("Shutdown flag set");
1507
1508 // Step 2: Signal condvar to wake keep-alive thread immediately
1509 // This is critical - without this, keep-alive waits for full ping_interval
1510 {
1511 let (lock, cvar) = &**self.inner.shutdown_signal();
1512 let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1513 log::warn!("Shutdown lock poisoned, recovering");
1514 poisoned.into_inner()
1515 });
1516 *shutdown = true;
1517 cvar.notify_all();
1518 log::debug!("Shutdown signal sent to keep-alive thread");
1519 } // Lock released here
1520
1521 // Step 3: Wait for keep-alive thread to exit
1522 if let Some(handle) = self.keep_alive_handle.take() {
1523 log::debug!("Waiting for keep-alive thread to exit...");
1524
1525 // Wrap thread join in spawn_blocking to make it async-friendly
1526 let join_task = tokio::task::spawn_blocking(move || handle.join());
1527
1528 // Give it 5 seconds to exit gracefully
1529 match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1530 Ok(Ok(Ok(_))) => {
1531 log::info!("Keep-alive thread stopped cleanly");
1532 }
1533 Ok(Ok(Err(_))) => {
1534 log::error!("Keep-alive thread panicked during shutdown");
1535 }
1536 Ok(Err(_)) => {
1537 log::error!("Keep-alive join task panicked");
1538 }
1539 Err(_) => {
1540 log::error!("Keep-alive thread didn't exit within 5s timeout");
1541 }
1542 }
1543 } else {
1544 log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1545 }
1546
1547 // Step 4: Abort all replacement creation tasks
1548 log::info!("Aborting replacement creation tasks...");
1549 let aborted_count = self.inner.abort_replacement_tasks();
1550 if aborted_count > 0 {
1551 log::info!("Aborted {} replacement tasks", aborted_count);
1552 } else {
1553 log::debug!("No replacement tasks to abort");
1554 }
1555
1556 // Step 5: Small delay to let aborted tasks clean up
1557 tokio::time::sleep(Duration::from_millis(100)).await;
1558
1559 // Step 6: Log final statistics
1560 let stats = self.stats();
1561 log::info!(
1562 "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1563 stats.available,
1564 stats.active,
1565 stats.total
1566 );
1567 }
1568
1569 /// Synchronously shutdown the pool (fallback method).
1570 ///
1571 /// This is a simplified shutdown for use in Drop or non-async contexts.
1572 /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1573 ///
1574 /// # Note
1575 ///
1576 /// This method doesn't wait for replacement tasks to finish since
1577 /// there's no async runtime available. Tasks are aborted but may not
1578 /// have cleaned up yet.
1579 pub fn shutdown(&mut self) {
1580 log::debug!("Calling synchronous shutdown...");
1581 self.shutdown_sync();
1582 }
1583
1584 /// Internal synchronous shutdown implementation.
1585 fn shutdown_sync(&mut self) {
1586 log::info!("Shutting down browser pool (sync mode)...");
1587
1588 // Set shutdown flag
1589 self.inner.set_shutting_down(true);
1590 log::debug!("Shutdown flag set");
1591
1592 // Signal condvar (same as async version)
1593 {
1594 let (lock, cvar) = &**self.inner.shutdown_signal();
1595 let mut shutdown = lock.lock().unwrap_or_else(|poisoned| {
1596 log::warn!("Shutdown lock poisoned, recovering");
1597 poisoned.into_inner()
1598 });
1599 *shutdown = true;
1600 cvar.notify_all();
1601 log::debug!("Shutdown signal sent");
1602 }
1603
1604 // Wait for keep-alive thread
1605 if let Some(handle) = self.keep_alive_handle.take() {
1606 log::debug!("Joining keep-alive thread (sync)...");
1607
1608 match handle.join() {
1609 Ok(_) => log::info!("Keep-alive thread stopped"),
1610 Err(_) => log::error!("Keep-alive thread panicked"),
1611 }
1612 }
1613
1614 // Abort replacement tasks (best effort - they won't make progress without runtime)
1615 let aborted_count = self.inner.abort_replacement_tasks();
1616 if aborted_count > 0 {
1617 log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1618 }
1619
1620 let stats = self.stats();
1621 log::info!(
1622 "Sync shutdown complete - Available: {}, Active: {}",
1623 stats.available,
1624 stats.active
1625 );
1626 }
1627
1628 /// Get a reference to the inner pool state.
1629 ///
1630 /// This is primarily for internal use and testing.
1631 #[doc(hidden)]
1632 #[allow(dead_code)]
1633 pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1634 &self.inner
1635 }
1636}
1637
1638impl Drop for BrowserPool {
1639 /// Automatic cleanup when pool is dropped.
1640 ///
1641 /// This ensures resources are released even if shutdown wasn't called explicitly.
1642 /// Uses sync shutdown since Drop can't be async.
1643 fn drop(&mut self) {
1644 log::debug!("🛑 BrowserPool Drop triggered - running cleanup");
1645
1646 // Only shutdown if not already done
1647 if !self.inner.is_shutting_down() {
1648 log::warn!("⚠ BrowserPool dropped without explicit shutdown - cleaning up");
1649 self.shutdown();
1650 } else {
1651 log::debug!(" Pool already shutdown, Drop is no-op");
1652 }
1653 }
1654}
1655
1656// ============================================================================
1657// BrowserPoolBuilder
1658// ============================================================================
1659
1660/// Builder for constructing a [`BrowserPool`] with validation.
1661///
1662/// This is the recommended way to create a pool as it validates
1663/// configuration and provides sensible defaults.
1664///
1665/// # Example
1666///
1667/// ```rust,ignore
1668/// use std::time::Duration;
1669/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1670///
1671/// let pool = BrowserPool::builder()
1672/// .config(
1673/// BrowserPoolConfigBuilder::new()
1674/// .max_pool_size(10)
1675/// .warmup_count(5)
1676/// .browser_ttl(Duration::from_secs(7200))
1677/// .build()?
1678/// )
1679/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1680/// .enable_keep_alive(true)
1681/// .build()?;
1682/// ```
1683pub struct BrowserPoolBuilder {
1684 /// Optional configuration (uses default if not provided).
1685 config: Option<BrowserPoolConfig>,
1686
1687 /// Browser factory (required).
1688 factory: Option<Box<dyn BrowserFactory>>,
1689
1690 /// Whether to enable keep-alive thread (default: true).
1691 enable_keep_alive: bool,
1692}
1693
1694impl BrowserPoolBuilder {
1695 /// Create a new builder with defaults.
1696 ///
1697 /// # Example
1698 ///
1699 /// ```rust,ignore
1700 /// let builder = BrowserPoolBuilder::new();
1701 /// ```
1702 pub fn new() -> Self {
1703 Self {
1704 config: None,
1705 factory: None,
1706 enable_keep_alive: true,
1707 }
1708 }
1709
1710 /// Set custom configuration.
1711 ///
1712 /// If not called, uses [`BrowserPoolConfig::default()`].
1713 ///
1714 /// # Parameters
1715 ///
1716 /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1717 ///
1718 /// # Example
1719 ///
1720 /// ```rust,ignore
1721 /// let config = BrowserPoolConfigBuilder::new()
1722 /// .max_pool_size(10)
1723 /// .build()?;
1724 ///
1725 /// let pool = BrowserPool::builder()
1726 /// .config(config)
1727 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1728 /// .build()?;
1729 /// ```
1730 pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1731 self.config = Some(config);
1732 self
1733 }
1734
1735 /// Set browser factory (required).
1736 ///
1737 /// The factory is responsible for creating browser instances.
1738 /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1739 ///
1740 /// # Parameters
1741 ///
1742 /// * `factory` - A boxed [`BrowserFactory`] implementation.
1743 ///
1744 /// # Example
1745 ///
1746 /// ```rust,ignore
1747 /// let pool = BrowserPool::builder()
1748 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1749 /// .build()?;
1750 /// ```
1751 pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1752 self.factory = Some(factory);
1753 self
1754 }
1755
1756 /// Enable or disable keep-alive thread.
1757 ///
1758 /// Keep-alive should be disabled only for testing.
1759 /// Production use should always have it enabled.
1760 ///
1761 /// # Parameters
1762 ///
1763 /// * `enable` - Whether to enable the keep-alive thread.
1764 ///
1765 /// # Example
1766 ///
1767 /// ```rust,ignore
1768 /// // Disable for tests
1769 /// let pool = BrowserPool::builder()
1770 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1771 /// .enable_keep_alive(false)
1772 /// .build()?;
1773 /// ```
1774 pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1775 self.enable_keep_alive = enable;
1776 self
1777 }
1778
1779 /// Build the browser pool.
1780 ///
1781 /// # Errors
1782 ///
1783 /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1784 ///
1785 /// # Panics
1786 ///
1787 /// Panics if called outside a tokio runtime context.
1788 ///
1789 /// # Example
1790 ///
1791 /// ```rust,ignore
1792 /// let pool = BrowserPool::builder()
1793 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1794 /// .build()?;
1795 /// ```
1796 pub fn build(self) -> Result<BrowserPool> {
1797 let config = self.config.unwrap_or_default();
1798 let factory = self.factory.ok_or_else(|| {
1799 BrowserPoolError::Configuration("No browser factory provided".to_string())
1800 })?;
1801
1802 log::info!("📦 Building browser pool with config: {:?}", config);
1803
1804 // Create inner state
1805 let inner = BrowserPoolInner::new(config, factory);
1806
1807 // Start keep-alive thread if enabled
1808 let keep_alive_handle = if self.enable_keep_alive {
1809 log::info!("🚀 Starting keep-alive monitoring thread");
1810 Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1811 } else {
1812 log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1813 None
1814 };
1815
1816 log::info!("✅ Browser pool built successfully");
1817
1818 Ok(BrowserPool {
1819 inner,
1820 keep_alive_handle,
1821 })
1822 }
1823}
1824
1825impl Default for BrowserPoolBuilder {
1826 fn default() -> Self {
1827 Self::new()
1828 }
1829}
1830
1831// ============================================================================
1832// Environment Initialization (feature-gated)
1833// ============================================================================
1834
1835/// Initialize browser pool from environment variables.
1836///
1837/// This is a convenience function for common initialization patterns.
1838/// It reads configuration from environment variables with sensible defaults.
1839///
1840/// # Feature Flag
1841///
1842/// This function is only available when the `env-config` feature is enabled.
1843///
1844/// # Environment Variables
1845///
1846/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1847/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1848/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1849/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1850/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1851///
1852/// # Returns
1853///
1854/// `Arc<BrowserPool>` ready for use in web handlers.
1855///
1856/// # Errors
1857///
1858/// - Returns error if configuration is invalid.
1859/// - Returns error if warmup fails.
1860///
1861/// # Example
1862///
1863/// ```rust,ignore
1864/// #[tokio::main]
1865/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1866/// env_logger::init();
1867///
1868/// let pool = init_browser_pool().await?;
1869///
1870/// // Use pool in handlers...
1871///
1872/// Ok(())
1873/// }
1874/// ```
1875#[cfg(feature = "env-config")]
1876pub async fn init_browser_pool() -> Result<Arc<BrowserPool>> {
1877 use crate::config::env::{chrome_path_from_env, from_env};
1878 use crate::factory::ChromeBrowserFactory;
1879
1880 log::info!("Initializing browser pool from environment...");
1881
1882 // Load configuration from environment
1883 let config = from_env()?;
1884
1885 // Get optional Chrome path
1886 let chrome_path = chrome_path_from_env();
1887
1888 log::info!("Pool configuration from environment:");
1889 log::info!(" - Max pool size: {}", config.max_pool_size);
1890 log::info!(" - Warmup count: {}", config.warmup_count);
1891 log::info!(
1892 " - Browser TTL: {}s ({}min)",
1893 config.browser_ttl.as_secs(),
1894 config.browser_ttl.as_secs() / 60
1895 );
1896 log::info!(" - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1897 log::info!(
1898 " - Chrome path: {}",
1899 chrome_path.as_deref().unwrap_or("auto-detect")
1900 );
1901
1902 // Create factory based on whether custom path is provided
1903 let factory: Box<dyn BrowserFactory> = match chrome_path {
1904 Some(path) => {
1905 log::info!("Using custom Chrome path: {}", path);
1906 Box::new(ChromeBrowserFactory::with_path(path))
1907 }
1908 None => {
1909 log::info!("Using auto-detected Chrome browser");
1910 Box::new(ChromeBrowserFactory::with_defaults())
1911 }
1912 };
1913
1914 // Create browser pool with Chrome factory
1915 log::debug!("Building browser pool...");
1916 let pool = BrowserPool::builder()
1917 .config(config.clone())
1918 .factory(factory)
1919 .enable_keep_alive(true)
1920 .build()
1921 .map_err(|e| {
1922 log::error!("❌ Failed to create browser pool: {}", e);
1923 e
1924 })?;
1925
1926 log::info!("✅ Browser pool created successfully");
1927
1928 // Warmup the pool
1929 log::info!(
1930 "Warming up browser pool with {} instances...",
1931 config.warmup_count
1932 );
1933 pool.warmup().await.map_err(|e| {
1934 log::error!("❌ Failed to warmup pool: {}", e);
1935 e
1936 })?;
1937
1938 let stats = pool.stats();
1939 log::info!(
1940 "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1941 stats.available,
1942 stats.active,
1943 stats.total
1944 );
1945
1946 Ok(pool.into_shared())
1947}
1948
1949// ============================================================================
1950// Unit Tests
1951// ============================================================================
1952
1953#[cfg(test)]
1954mod tests {
1955 use super::*;
1956
1957 /// Verifies that BrowserPool builder rejects missing factory.
1958 ///
1959 /// A factory is mandatory because the pool needs to know how to
1960 /// create browser instances. This test ensures proper error handling.
1961 #[test]
1962 fn test_pool_builder_missing_factory() {
1963 // We need a tokio runtime for the builder
1964 let rt = tokio::runtime::Runtime::new().unwrap();
1965
1966 rt.block_on(async {
1967 let config = crate::config::BrowserPoolConfigBuilder::new()
1968 .max_pool_size(3)
1969 .build()
1970 .unwrap();
1971
1972 let result = BrowserPool::builder()
1973 .config(config)
1974 // Intentionally missing factory
1975 .build();
1976
1977 assert!(result.is_err(), "Build should fail without factory");
1978
1979 match result {
1980 Err(BrowserPoolError::Configuration(msg)) => {
1981 assert!(
1982 msg.contains("No browser factory provided"),
1983 "Expected factory error, got: {}",
1984 msg
1985 );
1986 }
1987 _ => panic!("Expected Configuration error for missing factory"),
1988 }
1989 });
1990 }
1991
1992 /// Verifies that BrowserPoolBuilder implements Default.
1993 #[test]
1994 fn test_builder_default() {
1995 let builder: BrowserPoolBuilder = Default::default();
1996 assert!(builder.config.is_none());
1997 assert!(builder.factory.is_none());
1998 assert!(builder.enable_keep_alive);
1999 }
2000
2001 /// Verifies that enable_keep_alive can be disabled.
2002 #[test]
2003 fn test_builder_disable_keep_alive() {
2004 let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
2005 assert!(!builder.enable_keep_alive);
2006 }
2007}