html2pdf_api/pool.rs
1//! Browser pool with lifecycle management.
2//!
3//! This module provides [`BrowserPool`], the main entry point for managing
4//! a pool of headless Chrome browsers with automatic lifecycle management.
5//!
6//! # Overview
7//!
8//! The browser pool provides:
9//! - **Connection Pooling**: Reuses browser instances to avoid expensive startup costs
10//! - **Health Monitoring**: Background thread continuously checks browser health
11//! - **TTL Management**: Automatically retires old browsers and creates replacements
12//! - **Race-Free Design**: Careful lock ordering prevents deadlocks
13//! - **Graceful Shutdown**: Clean termination of all background tasks
14//! - **RAII Pattern**: Automatic return of browsers to pool via Drop
15//!
16//! # Architecture
17//!
18//! ```text
19//! BrowserPool
20//! ├─ BrowserPoolInner (shared state)
21//! │ ├─ available: Vec<TrackedBrowser> (pooled, ready to use)
22//! │ ├─ active: HashMap<id, TrackedBrowser> (in-use, tracked for health)
23//! │ └─ replacement_tasks: Vec<JoinHandle> (async replacement creators)
24//! └─ keep_alive_handle: JoinHandle (health monitoring thread)
25//! ```
26//!
27//! # Critical Invariants
28//!
29//! 1. **Lock Order**: Always acquire `active` before `available` to prevent deadlocks
30//! 2. **Shutdown Flag**: Check before all expensive operations
31//! 3. **Health Checks**: Never hold locks during I/O operations
32//!
33//! # Example
34//!
35//! ```rust,no_run
36//! use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
37//!
38//! #[tokio::main]
39//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
40//! // Create pool
41//! let mut pool = BrowserPool::builder()
42//! .config(
43//! BrowserPoolConfigBuilder::new()
44//! .max_pool_size(5)
45//! .warmup_count(3)
46//! .build()?
47//! )
48//! .factory(Box::new(ChromeBrowserFactory::with_defaults()))
49//! .build()?;
50//!
51//! // Warmup
52//! pool.warmup().await?;
53//!
54//! // Use browsers
55//! {
56//! let browser = pool.get()?;
57//! let tab = browser.new_tab()?;
58//! // ... do work ...
59//! } // browser returned to pool automatically
60//!
61//! // Shutdown
62//! pool.shutdown_async().await;
63//!
64//! Ok(())
65//! }
66//! ```
67
68use std::collections::HashMap;
69use std::sync::atomic::{AtomicBool, Ordering};
70use std::sync::{Arc, Condvar, Mutex};
71use std::thread::{self, JoinHandle};
72use std::time::Duration;
73
74use tokio::task::JoinHandle as TokioJoinHandle;
75
76use crate::config::BrowserPoolConfig;
77use crate::error::{BrowserPoolError, Result};
78use crate::factory::BrowserFactory;
79use crate::handle::BrowserHandle;
80use crate::stats::PoolStats;
81use crate::tracked::TrackedBrowser;
82
83// ============================================================================
84// BrowserPoolInner
85// ============================================================================
86
87/// Internal shared state for the browser pool.
88///
89/// This struct contains all shared state and is wrapped in Arc for thread-safe
90/// sharing between the pool, handles, and background threads.
91///
92/// # Lock Ordering (CRITICAL)
93///
94/// Always acquire locks in this order to prevent deadlocks:
95/// 1. `active` (browsers currently in use)
96/// 2. `available` (browsers in pool ready for use)
97///
98/// Never hold locks during I/O operations or browser creation.
99///
100/// # Thread Safety
101///
102/// All fields are protected by appropriate synchronization primitives:
103/// - `Mutex` for mutable collections
104/// - `AtomicBool` for shutdown flag
105/// - `Arc` for shared ownership
106pub(crate) struct BrowserPoolInner {
107 /// Configuration (immutable after creation).
108 config: BrowserPoolConfig,
109
110 /// Browsers available for checkout (not currently in use).
111 ///
112 /// Protected by Mutex. Browsers are moved from here when checked out
113 /// and returned here when released (if pool not full).
114 available: Mutex<Vec<TrackedBrowser>>,
115
116 /// All browsers that exist (both pooled and checked out).
117 ///
118 /// Protected by Mutex. Used for health monitoring and lifecycle tracking.
119 /// Maps browser ID -> TrackedBrowser for fast lookup.
120 active: Mutex<HashMap<u64, TrackedBrowser>>,
121
122 /// Factory for creating new browser instances.
123 factory: Box<dyn BrowserFactory>,
124
125 /// Atomic flag indicating shutdown in progress.
126 ///
127 /// Checked before expensive operations. Once set, no new operations start.
128 shutting_down: AtomicBool,
129
130 /// Background tasks creating replacement browsers.
131 ///
132 /// Tracked so we can abort them during shutdown.
133 replacement_tasks: Mutex<Vec<TokioJoinHandle<()>>>,
134
135 /// Handle to tokio runtime for spawning async tasks.
136 ///
137 /// Captured at creation time to allow spawning from any context.
138 runtime_handle: tokio::runtime::Handle,
139
140 /// Shutdown signaling mechanism for keep-alive thread.
141 ///
142 /// Tuple of (flag, condvar) allows immediate wake-up on shutdown
143 /// instead of waiting for full ping_interval.
144 shutdown_signal: Arc<(Mutex<bool>, Condvar)>,
145}
146
147impl BrowserPoolInner {
148 /// Create a new browser pool inner state.
149 ///
150 /// # Parameters
151 ///
152 /// * `config` - Validated configuration.
153 /// * `factory` - Browser factory for creating instances.
154 ///
155 /// # Panics
156 ///
157 /// Panics if called outside a tokio runtime context.
158 pub(crate) fn new(config: BrowserPoolConfig, factory: Box<dyn BrowserFactory>) -> Arc<Self> {
159 log::info!(
160 " Initializing browser pool with capacity {}",
161 config.max_pool_size
162 );
163 log::debug!(
164 " Pool config: warmup={}, TTL={}s, ping_interval={}s",
165 config.warmup_count,
166 config.browser_ttl.as_secs(),
167 config.ping_interval.as_secs()
168 );
169
170 // Capture runtime handle for spawning async tasks
171 // This allows us to spawn from sync contexts (like Drop)
172 let runtime_handle = tokio::runtime::Handle::current();
173
174 Arc::new(Self {
175 config,
176 available: Mutex::new(Vec::new()),
177 active: Mutex::new(HashMap::new()),
178 factory,
179 shutting_down: AtomicBool::new(false),
180 replacement_tasks: Mutex::new(Vec::new()),
181 runtime_handle,
182 shutdown_signal: Arc::new((Mutex::new(false), Condvar::new())),
183 })
184 }
185
186 /// Create a browser directly without using the pool.
187 ///
188 /// Used for:
189 /// - Initial warmup
190 /// - Replacing failed browsers
191 /// - When pool is empty
192 ///
193 /// # Important
194 ///
195 /// Adds the browser to `active` tracking immediately for health monitoring.
196 ///
197 /// # Errors
198 ///
199 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
200 /// - Returns [`BrowserPoolError::BrowserCreation`] if factory fails.
201 pub(crate) fn create_browser_direct(&self) -> Result<TrackedBrowser> {
202 // Early exit if shutting down (don't waste time creating browsers)
203 if self.shutting_down.load(Ordering::Acquire) {
204 log::debug!(" Skipping browser creation - pool is shutting down");
205 return Err(BrowserPoolError::ShuttingDown);
206 }
207
208 log::debug!("️ Creating new browser directly via factory...");
209
210 // Factory handles all Chrome launch complexity
211 let browser = self.factory.create()?;
212
213 // Wrap with tracking metadata
214 let tracked = TrackedBrowser::new(browser)?;
215 let id = tracked.id();
216
217 // Add to active tracking immediately for health monitoring
218 // This ensures keep-alive thread will monitor it
219 if let Ok(mut active) = self.active.lock() {
220 active.insert(id, tracked.clone());
221 log::debug!(
222 " Browser {} added to active tracking (total active: {})",
223 id,
224 active.len()
225 );
226 } else {
227 log::warn!(
228 "⚠️ Failed to add browser {} to active tracking (poisoned lock)",
229 id
230 );
231 }
232
233 log::info!("✅ Created new browser with ID {}", id);
234 Ok(tracked)
235 }
236
237 /// Get a browser from pool or create a new one.
238 ///
239 /// # Algorithm
240 ///
241 /// 1. Loop through pooled browsers
242 /// 2. **Grace Period Check**: Check if browser is within 30s of TTL.
243 /// - If near expiry: Skip (drop) it immediately.
244 /// - It remains in `active` tracking so the `keep_alive` thread handles standard retirement/replacement.
245 /// 3. For valid browsers, perform detailed health check (without holding locks)
246 /// 4. If healthy, return it
247 /// 5. If unhealthy, remove from active tracking and try next
248 /// 6. If pool empty or all skipped/unhealthy, create new browser
249 ///
250 /// # Critical: Lock-Free Health Checks
251 ///
252 /// Health checks are performed WITHOUT holding locks to avoid blocking
253 /// other threads. This is why we use a loop pattern instead of iterator.
254 ///
255 /// # Returns
256 ///
257 /// [`BrowserHandle`] that auto-returns browser to pool when dropped.
258 ///
259 /// # Errors
260 ///
261 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
262 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
263 pub(crate) fn get_or_create_browser(self: &Arc<Self>) -> Result<BrowserHandle> {
264 log::debug!(" Attempting to get browser from pool...");
265
266 // Try to get from pool - LOOP pattern to avoid holding lock during health checks
267 // This is critical for concurrency: we release the lock between attempts
268 loop {
269 // Acquire lock briefly to pop one browser
270 let tracked_opt = {
271 let mut available = self.available.lock().unwrap();
272 let popped = available.pop();
273 log::trace!(" Pool size after pop: {}", available.len());
274 popped
275 }; // Lock released here - critical for performance
276
277 if let Some(tracked) = tracked_opt {
278 // === LOGIC START: Grace Period Check ===
279 let age = tracked.created_at().elapsed();
280 let ttl = self.config.browser_ttl;
281
282 // Safety margin matching your stagger interval
283 let safety_margin = Duration::from_secs(30);
284
285 // If browser is about to expire, don't use it.
286 if age + safety_margin > ttl {
287 log::debug!(
288 "⏳ Browser {} is near expiry (Age: {}s, Margin: 30s), skipping.",
289 tracked.id(),
290 age.as_secs()
291 );
292
293 // CRITICAL: We do NOT remove/recreate here.
294 // By simply 'continuing', we drop this 'tracked' instance.
295 // 1. It is NOT returned to 'available' (so no user gets it).
296 // 2. It REMAINS in 'active' (so the keep_alive thread still tracks it).
297 // 3. The keep_alive thread will see it expire and handle standard cleanup/replacement.
298 continue;
299 }
300 // === LOGIC END: Grace Period Check ===
301
302 log::debug!(" Testing browser {} from pool for health...", tracked.id());
303
304 // Detailed health check WITHOUT holding any locks
305 // This prevents blocking other threads during I/O
306 match tracked.browser().new_tab() {
307 Ok(tab) => {
308 log::trace!(
309 "✅ Browser {} health check: new_tab() successful",
310 tracked.id()
311 );
312
313 // Test navigation capability (full health check)
314 match tab
315 .navigate_to("data:text/html,<html><body>Health check</body></html>")
316 {
317 Ok(_) => {
318 log::trace!(
319 "✅ Browser {} health check: navigation successful",
320 tracked.id()
321 );
322
323 // Test cleanup capability
324 match tab.close(true) {
325 Ok(_) => {
326 log::debug!(
327 "✅ Browser {} passed full health check - ready for use",
328 tracked.id()
329 );
330
331 // Get pool size for logging (brief lock)
332 let pool_size = {
333 let available = self.available.lock().unwrap();
334 available.len()
335 };
336
337 log::info!(
338 "♻️ Reusing healthy browser {} from pool (pool size: {})",
339 tracked.id(),
340 pool_size
341 );
342
343 // Return healthy browser wrapped in RAII handle
344 return Ok(BrowserHandle::new(tracked, Arc::clone(self)));
345 }
346 Err(e) => {
347 log::warn!(
348 "❌ Browser {} health check: tab close failed: {}",
349 tracked.id(),
350 e
351 );
352 }
353 }
354 }
355 Err(e) => {
356 log::warn!(
357 "❌ Browser {} health check: navigation failed: {}",
358 tracked.id(),
359 e
360 );
361 }
362 }
363 }
364 Err(e) => {
365 log::warn!(
366 "❌ Browser {} health check: new_tab() failed: {}",
367 tracked.id(),
368 e
369 );
370 }
371 }
372
373 // If we reach here, health check failed
374 // Remove from active tracking (browser is dead)
375 log::warn!(
376 "️ Removing unhealthy browser {} from active tracking",
377 tracked.id()
378 );
379 {
380 let mut active = self.active.lock().unwrap();
381 active.remove(&tracked.id());
382 log::debug!(" Active browsers after removal: {}", active.len());
383 }
384
385 // Continue loop to try next browser in pool
386 log::debug!(" Trying next browser from pool...");
387 } else {
388 // Pool is empty, break to create new browser
389 log::debug!(" Pool is empty, will create new browser");
390 break;
391 }
392 }
393
394 // Pool is empty or no healthy browsers found
395 log::info!("️ Creating new browser (pool was empty or all browsers unhealthy)");
396
397 let tracked = self.create_browser_direct()?;
398
399 log::info!("✅ Returning newly created browser {}", tracked.id());
400 Ok(BrowserHandle::new(tracked, Arc::clone(self)))
401 }
402
403 /// Return a browser to the pool (called by BrowserHandle::drop).
404 ///
405 /// # Critical Lock Ordering
406 ///
407 /// Always acquires locks in order: active -> available.
408 /// Both locks are held together to prevent race conditions.
409 ///
410 /// # Algorithm
411 ///
412 /// 1. Acquire both locks (order: active, then available)
413 /// 2. Verify browser is in active tracking
414 /// 3. Check TTL - if expired, retire and trigger replacement
415 /// 4. If pool has space, add to available pool
416 /// 5. If pool full, remove from active (browser gets dropped)
417 ///
418 /// # Parameters
419 ///
420 /// * `self_arc` - Arc reference to self (needed for spawning async tasks).
421 /// * `tracked` - The browser being returned.
422 pub(crate) fn return_browser(self_arc: &Arc<Self>, tracked: TrackedBrowser) {
423 log::debug!(" Returning browser {} to pool...", tracked.id());
424
425 // Early exit if shutting down (don't waste time managing pool)
426 if self_arc.shutting_down.load(Ordering::Acquire) {
427 log::debug!(
428 " Pool shutting down, not returning browser {}",
429 tracked.id()
430 );
431 return;
432 }
433
434 // CRITICAL: Always acquire in order: active -> pool
435 // Holding both locks prevents ALL race conditions:
436 // - Prevents concurrent modifications to browser state
437 // - Prevents duplicate returns
438 // - Ensures pool size limits are respected
439 let mut active = self_arc.active.lock().unwrap();
440 let mut pool = self_arc.available.lock().unwrap();
441
442 // Verify browser is actually tracked (sanity check)
443 if !active.contains_key(&tracked.id()) {
444 log::warn!(
445 "❌ Browser {} not in active tracking (probably already removed), skipping return",
446 tracked.id()
447 );
448 return;
449 }
450
451 // Check TTL before returning to pool
452 // Expired browsers should be retired to prevent memory leaks
453 if tracked.is_expired(self_arc.config.browser_ttl) {
454 log::info!(
455 "⏰ Browser {} expired (age: {}min, TTL: {}min), retiring instead of returning",
456 tracked.id(),
457 tracked.age_minutes(),
458 self_arc.config.browser_ttl.as_secs() / 60
459 );
460
461 // Remove from active tracking
462 active.remove(&tracked.id());
463 log::debug!(" Active browsers after TTL retirement: {}", active.len());
464
465 // Release locks before spawning replacement task
466 drop(active);
467 drop(pool);
468
469 // Trigger async replacement creation (non-blocking)
470 log::debug!(" Triggering replacement browser creation for expired browser");
471 Self::spawn_replacement_creation(Arc::clone(self_arc), 1);
472 return;
473 }
474
475 // Prevent duplicate returns (defensive programming)
476 if pool.iter().any(|b| b.id() == tracked.id()) {
477 log::warn!(
478 "⚠️ Browser {} already in pool (duplicate return attempt), skipping",
479 tracked.id()
480 );
481 return;
482 }
483
484 // Check if pool has space for this browser
485 if pool.len() < self_arc.config.max_pool_size {
486 // Add to pool for reuse
487 pool.push(tracked.clone());
488 log::info!(
489 "♻️ Browser {} returned to pool (pool size: {}/{})",
490 tracked.id(),
491 pool.len(),
492 self_arc.config.max_pool_size
493 );
494 } else {
495 // Pool is full, remove from tracking (browser will be dropped)
496 log::debug!(
497 "️ Pool full ({}/{}), removing browser {} from system",
498 pool.len(),
499 self_arc.config.max_pool_size,
500 tracked.id()
501 );
502 active.remove(&tracked.id());
503 log::debug!(" Active browsers after removal: {}", active.len());
504 }
505 }
506
507 /// Asynchronously create replacement browsers (internal helper).
508 ///
509 /// This is the async work function that actually creates browsers.
510 /// It's spawned as a tokio task by `spawn_replacement_creation`.
511 ///
512 /// # Algorithm
513 ///
514 /// 1. Check shutdown flag before each creation
515 /// 2. Check pool space before each creation
516 /// 3. Use spawn_blocking for CPU-bound browser creation
517 /// 4. Add successful browsers to pool
518 /// 5. Log detailed status
519 ///
520 /// # Parameters
521 ///
522 /// * `inner` - Arc reference to pool state.
523 /// * `count` - Number of browsers to attempt to create.
524 async fn spawn_replacement_creation_async(inner: Arc<Self>, count: usize) {
525 log::info!(
526 " Starting async replacement creation for {} browsers",
527 count
528 );
529
530 let mut created_count = 0;
531 let mut failed_count = 0;
532
533 for i in 0..count {
534 // Check shutdown flag before each expensive operation
535 if inner.shutting_down.load(Ordering::Acquire) {
536 log::info!(
537 " Shutdown detected during replacement creation, stopping at {}/{}",
538 i,
539 count
540 );
541 break;
542 }
543
544 // Check if pool has space BEFORE creating (avoid wasted work)
545 let pool_has_space = {
546 let pool = inner.available.lock().unwrap();
547 let has_space = pool.len() < inner.config.max_pool_size;
548 log::trace!(
549 " Pool space check: {}/{} (has space: {})",
550 pool.len(),
551 inner.config.max_pool_size,
552 has_space
553 );
554 has_space
555 };
556
557 if !pool_has_space {
558 log::warn!(
559 "⚠️ Pool is full, stopping replacement creation at {}/{}",
560 i,
561 count
562 );
563 break;
564 }
565
566 log::debug!("️ Creating replacement browser {}/{}", i + 1, count);
567
568 // Use spawn_blocking for CPU-bound browser creation
569 // This prevents blocking the async runtime
570 let inner_clone = Arc::clone(&inner);
571 let result =
572 tokio::task::spawn_blocking(move || inner_clone.create_browser_direct()).await;
573
574 match result {
575 Ok(Ok(tracked)) => {
576 let id = tracked.id();
577
578 // Add to pool (with space check to handle race conditions)
579 let mut pool = inner.available.lock().unwrap();
580
581 // Double-check space (another thread might have added browsers)
582 if pool.len() < inner.config.max_pool_size {
583 pool.push(tracked);
584 created_count += 1;
585 log::info!(
586 "✅ Created replacement browser {} and added to pool ({}/{})",
587 id,
588 i + 1,
589 count
590 );
591 } else {
592 log::warn!(
593 "⚠️ Pool became full during creation, replacement browser {} kept in active only",
594 id
595 );
596 created_count += 1; // Still count as created (just not pooled)
597 }
598 }
599 Ok(Err(e)) => {
600 failed_count += 1;
601 log::error!(
602 "❌ Failed to create replacement browser {}/{}: {}",
603 i + 1,
604 count,
605 e
606 );
607 }
608 Err(e) => {
609 failed_count += 1;
610 log::error!(
611 "❌ Replacement browser {}/{} task panicked: {:?}",
612 i + 1,
613 count,
614 e
615 );
616 }
617 }
618 }
619
620 // Final status report
621 let pool_size = inner.available.lock().unwrap().len();
622 let active_size = inner.active.lock().unwrap().len();
623
624 log::info!(
625 " Replacement creation completed: {}/{} created, {} failed. Pool: {}, Active: {}",
626 created_count,
627 count,
628 failed_count,
629 pool_size,
630 active_size
631 );
632 }
633
634 /// Spawn a background task to create replacement browsers.
635 ///
636 /// This is non-blocking and returns immediately. The actual browser
637 /// creation happens in a tokio task tracked in `replacement_tasks`.
638 ///
639 /// # Why Async
640 ///
641 /// Browser creation is slow (1-3 seconds per browser). Spawning async
642 /// tasks prevents blocking the caller.
643 ///
644 /// # Task Tracking
645 ///
646 /// Tasks are tracked so we can abort them during shutdown.
647 ///
648 /// # Parameters
649 ///
650 /// * `inner` - Arc reference to pool state.
651 /// * `count` - Number of replacement browsers to create.
652 pub(crate) fn spawn_replacement_creation(inner: Arc<Self>, count: usize) {
653 log::info!(
654 " Spawning async task to create {} replacement browsers",
655 count
656 );
657
658 // Clone Arc for moving into async task
659 let inner_for_task = Arc::clone(&inner);
660
661 // Spawn async task on the captured runtime
662 let task_handle = inner.runtime_handle.spawn(async move {
663 Self::spawn_replacement_creation_async(inner_for_task, count).await;
664 });
665
666 // Track task handle for shutdown cleanup
667 if let Ok(mut tasks) = inner.replacement_tasks.lock() {
668 // Clean up finished tasks while we have the lock (housekeeping)
669 let original_count = tasks.len();
670 tasks.retain(|h| !h.is_finished());
671 let cleaned = original_count - tasks.len();
672
673 if cleaned > 0 {
674 log::trace!("粒 Cleaned up {} finished replacement tasks", cleaned);
675 }
676
677 // Add new task
678 tasks.push(task_handle);
679
680 log::debug!(" Now tracking {} active replacement tasks", tasks.len());
681 } else {
682 log::warn!("⚠️ Failed to track replacement task (poisoned lock)");
683 }
684 }
685
686 /// Get the pool configuration.
687 #[inline]
688 pub(crate) fn config(&self) -> &BrowserPoolConfig {
689 &self.config
690 }
691
692 /// Check if the pool is shutting down.
693 #[inline]
694 pub(crate) fn is_shutting_down(&self) -> bool {
695 self.shutting_down.load(Ordering::Acquire)
696 }
697
698 /// Set the shutdown flag.
699 #[inline]
700 pub(crate) fn set_shutting_down(&self, value: bool) {
701 self.shutting_down.store(value, Ordering::Release);
702 }
703
704 /// Get the shutdown signal for the keep-alive thread.
705 #[inline]
706 pub(crate) fn shutdown_signal(&self) -> &Arc<(Mutex<bool>, Condvar)> {
707 &self.shutdown_signal
708 }
709
710 /// Get the available browsers count.
711 pub(crate) fn available_count(&self) -> usize {
712 self.available.lock().map(|g| g.len()).unwrap_or(0)
713 }
714
715 /// Get the active browsers count.
716 pub(crate) fn active_count(&self) -> usize {
717 self.active.lock().map(|g| g.len()).unwrap_or(0)
718 }
719
720 /// Get a snapshot of active browsers for health checking.
721 ///
722 /// Returns a cloned list to avoid holding locks during I/O.
723 pub(crate) fn get_active_browsers_snapshot(&self) -> Vec<(u64, TrackedBrowser)> {
724 let active = self.active.lock().unwrap();
725 active
726 .iter()
727 .map(|(id, tracked)| (*id, tracked.clone()))
728 .collect()
729 }
730
731 /// Remove a browser from active tracking.
732 pub(crate) fn remove_from_active(&self, id: u64) -> Option<TrackedBrowser> {
733 let mut active = self.active.lock().unwrap();
734 active.remove(&id)
735 }
736
737 /// Remove browsers from the available pool by ID.
738 pub(crate) fn remove_from_available(&self, ids: &[u64]) {
739 let mut pool = self.available.lock().unwrap();
740 let original_size = pool.len();
741 pool.retain(|b| !ids.contains(&b.id()));
742 let removed = original_size - pool.len();
743 if removed > 0 {
744 log::debug!("️ Removed {} browsers from available pool", removed);
745 }
746 }
747
748 /// Abort all replacement tasks.
749 pub(crate) fn abort_replacement_tasks(&self) -> usize {
750 if let Ok(mut tasks) = self.replacement_tasks.lock() {
751 let count = tasks.len();
752 for handle in tasks.drain(..) {
753 handle.abort();
754 }
755 count
756 } else {
757 0
758 }
759 }
760}
761
762// ============================================================================
763// BrowserPool
764// ============================================================================
765
766/// Main browser pool with lifecycle management.
767///
768/// This is the public-facing API for the browser pool. It wraps the internal
769/// state and manages the keep-alive thread.
770///
771/// # Overview
772///
773/// `BrowserPool` provides:
774/// - Browser checkout via [`get()`](Self::get)
775/// - Pool warmup via [`warmup()`](Self::warmup)
776/// - Statistics via [`stats()`](Self::stats)
777/// - Graceful shutdown via [`shutdown_async()`](Self::shutdown_async)
778///
779/// # Example
780///
781/// ```rust,no_run
782/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
783/// use std::time::Duration;
784///
785/// #[tokio::main]
786/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
787/// // Create pool
788/// let mut pool = BrowserPool::builder()
789/// .config(
790/// BrowserPoolConfigBuilder::new()
791/// .max_pool_size(5)
792/// .warmup_count(3)
793/// .build()?
794/// )
795/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
796/// .build()?;
797///
798/// // Warmup
799/// pool.warmup().await?;
800///
801/// // Use browsers
802/// {
803/// let browser = pool.get()?;
804/// let tab = browser.new_tab()?;
805/// // ... do work ...
806/// } // browser returned to pool automatically
807///
808/// // Shutdown
809/// pool.shutdown_async().await;
810///
811/// Ok(())
812/// }
813/// ```
814///
815/// # Thread Safety
816///
817/// `BrowserPool` is `Send` and can be wrapped in `Arc<Mutex<>>` for sharing
818/// across threads. Use [`into_shared()`](Self::into_shared) for convenience.
819pub struct BrowserPool {
820 /// Shared internal state.
821 inner: Arc<BrowserPoolInner>,
822
823 /// Handle to keep-alive monitoring thread.
824 ///
825 /// Option allows taking during shutdown. None means keep-alive disabled.
826 keep_alive_handle: Option<JoinHandle<()>>,
827}
828
829impl BrowserPool {
830 /// Convert pool into a shared `Arc<Mutex<>>` for use in web handlers.
831 ///
832 /// This is convenient for web frameworks that need shared state.
833 ///
834 /// # Example
835 ///
836 /// ```rust,ignore
837 /// let pool = BrowserPool::builder()
838 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
839 /// .build()?
840 /// .into_shared();
841 ///
842 /// // Can now be cloned and shared across handlers
843 /// let pool_clone = Arc::clone(&pool);
844 /// ```
845 pub fn into_shared(self) -> Arc<Mutex<BrowserPool>> {
846 log::debug!(" Converting BrowserPool into shared Arc<Mutex<>>");
847 Arc::new(Mutex::new(self))
848 }
849
850 /// Create a new builder for constructing a BrowserPool.
851 ///
852 /// This is the recommended way to create a pool.
853 ///
854 /// # Example
855 ///
856 /// ```rust,ignore
857 /// let pool = BrowserPool::builder()
858 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
859 /// .build()?;
860 /// ```
861 pub fn builder() -> BrowserPoolBuilder {
862 BrowserPoolBuilder::new()
863 }
864
865 /// Get a browser from the pool (or create one if empty).
866 ///
867 /// Returns a [`BrowserHandle`] that implements `Deref<Target=Browser>`,
868 /// allowing transparent access to browser methods.
869 ///
870 /// # Automatic Return
871 ///
872 /// The browser is automatically returned to the pool when the handle
873 /// is dropped, even if your code panics (RAII pattern).
874 ///
875 /// # Errors
876 ///
877 /// - Returns [`BrowserPoolError::ShuttingDown`] if pool is shutting down.
878 /// - Returns [`BrowserPoolError::BrowserCreation`] if new browser creation fails.
879 /// - Returns [`BrowserPoolError::HealthCheckFailed`] if all pooled browsers are unhealthy.
880 ///
881 /// # Example
882 ///
883 /// ```rust,ignore
884 /// let browser = pool.get()?;
885 /// let tab = browser.new_tab()?;
886 /// tab.navigate_to("https://example.com")?;
887 /// // browser returned automatically when it goes out of scope
888 /// ```
889 pub fn get(&self) -> Result<BrowserHandle> {
890 log::trace!(" BrowserPool::get() called");
891 self.inner.get_or_create_browser()
892 }
893
894 /// Get pool statistics snapshot.
895 ///
896 /// # Returns
897 ///
898 /// [`PoolStats`] containing:
899 /// - `available`: Browsers in pool ready for checkout
900 /// - `active`: All browsers (pooled + checked out)
901 /// - `total`: Currently same as `active` (for future expansion)
902 ///
903 /// # Example
904 ///
905 /// ```rust,ignore
906 /// let stats = pool.stats();
907 /// println!("Available: {}, Active: {}", stats.available, stats.active);
908 /// ```
909 pub fn stats(&self) -> PoolStats {
910 let available = self.inner.available_count();
911 let active = self.inner.active_count();
912
913 log::trace!(" Pool stats: available={}, active={}", available, active);
914
915 PoolStats {
916 available,
917 active,
918 total: active,
919 }
920 }
921
922 /// Get a reference to the pool configuration.
923 ///
924 /// Returns the configuration that was used to create this pool.
925 /// The configuration is immutable after pool creation.
926 ///
927 /// # Example
928 ///
929 /// ```rust,ignore
930 /// let pool = BrowserPool::builder()
931 /// .config(
932 /// BrowserPoolConfigBuilder::new()
933 /// .max_pool_size(10)
934 /// .build()?
935 /// )
936 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
937 /// .build()?;
938 ///
939 /// println!("Max pool size: {}", pool.config().max_pool_size);
940 /// println!("Browser TTL: {:?}", pool.config().browser_ttl);
941 /// ```
942 ///
943 /// # Use Cases
944 ///
945 /// - Logging configuration at startup
946 /// - Monitoring/metrics collection
947 /// - Readiness checks (comparing active count vs max_pool_size)
948 /// - Debugging pool behavior
949 #[inline]
950 pub fn config(&self) -> &BrowserPoolConfig {
951 self.inner.config()
952 }
953
954 /// Warmup the pool by pre-creating browsers.
955 ///
956 /// This is highly recommended to reduce first-request latency.
957 /// Should be called during application startup.
958 ///
959 /// # Process
960 ///
961 /// 1. Creates `warmup_count` browsers sequentially with staggered timing
962 /// 2. Tests each browser with navigation
963 /// 3. Returns all browsers to pool
964 /// 4. Entire process has timeout (configurable via `warmup_timeout`)
965 ///
966 /// # Staggered Creation
967 ///
968 /// Browsers are created with a 30-second delay between them to ensure
969 /// their TTLs are offset. This prevents all browsers from expiring
970 /// at the same time.
971 ///
972 /// # Errors
973 ///
974 /// - Returns error if warmup times out.
975 /// - Returns error if browser creation fails.
976 ///
977 /// # Example
978 ///
979 /// ```rust,ignore
980 /// let pool = BrowserPool::builder()
981 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
982 /// .build()?;
983 ///
984 /// // Warmup during startup
985 /// pool.warmup().await?;
986 /// ```
987 pub async fn warmup(&self) -> Result<()> {
988 let count = self.inner.config().warmup_count;
989 let warmup_timeout = self.inner.config().warmup_timeout;
990
991 log::info!(
992 " Starting browser pool warmup with {} instances (timeout: {}s)",
993 count,
994 warmup_timeout.as_secs()
995 );
996
997 // Wrap entire warmup in timeout to prevent hanging forever
998 let warmup_result = tokio::time::timeout(warmup_timeout, self.warmup_internal(count)).await;
999
1000 match warmup_result {
1001 Ok(Ok(())) => {
1002 let stats = self.stats();
1003 log::info!(
1004 "✅ Warmup completed successfully - Available: {}, Active: {}",
1005 stats.available,
1006 stats.active
1007 );
1008 Ok(())
1009 }
1010 Ok(Err(e)) => {
1011 log::error!("❌ Warmup failed with error: {}", e);
1012 Err(e)
1013 }
1014 Err(_) => {
1015 log::error!("❌ Warmup timed out after {}s", warmup_timeout.as_secs());
1016 Err(BrowserPoolError::Configuration(format!(
1017 "Warmup timed out after {}s",
1018 warmup_timeout.as_secs()
1019 )))
1020 }
1021 }
1022 }
1023
1024 /// Internal warmup implementation (separated for cleaner timeout wrapping).
1025 ///
1026 /// Creates browsers sequentially with a delay between them.
1027 /// This ensures they don't all reach their TTL (expiration) at the exact same moment.
1028 async fn warmup_internal(&self, count: usize) -> Result<()> {
1029 log::debug!(" Starting internal warmup process for {} browsers", count);
1030
1031 // STAGGER CONFIGURATION
1032 // We wait this long between creations to distribute expiration times
1033 let stagger_interval = Duration::from_secs(30);
1034
1035 let mut handles = Vec::new();
1036 let mut created_count = 0;
1037 let mut failed_count = 0;
1038
1039 for i in 0..count {
1040 log::debug!(" Creating startup browser instance {}/{}", i + 1, count);
1041
1042 // Per-browser timeout (15s per browser is reasonable)
1043 // This prevents one slow browser from blocking entire warmup
1044 let browser_result = tokio::time::timeout(
1045 Duration::from_secs(15),
1046 tokio::task::spawn_blocking({
1047 let inner = Arc::clone(&self.inner);
1048 move || inner.create_browser_direct()
1049 }),
1050 )
1051 .await;
1052
1053 match browser_result {
1054 Ok(Ok(Ok(tracked))) => {
1055 log::debug!(
1056 "✅ Browser {} created, performing validation test...",
1057 tracked.id()
1058 );
1059
1060 // Test the browser with actual navigation
1061 match tracked.browser().new_tab() {
1062 Ok(tab) => {
1063 log::trace!("✅ Browser {} test: new_tab() successful", tracked.id());
1064
1065 // Navigate to test page
1066 let nav_result = tab.navigate_to(
1067 "data:text/html,<html><body>Warmup test</body></html>",
1068 );
1069 if let Err(e) = nav_result {
1070 log::warn!(
1071 "⚠️ Browser {} test navigation failed: {}",
1072 tracked.id(),
1073 e
1074 );
1075 } else {
1076 log::trace!(
1077 "✅ Browser {} test: navigation successful",
1078 tracked.id()
1079 );
1080 }
1081
1082 // Clean up test tab
1083 let _ = tab.close(true);
1084
1085 // Keep handle so browser stays alive
1086 handles.push(BrowserHandle::new(tracked, Arc::clone(&self.inner)));
1087
1088 created_count += 1;
1089 log::info!(
1090 "✅ Browser instance {}/{} ready and validated",
1091 i + 1,
1092 count
1093 );
1094 }
1095 Err(e) => {
1096 failed_count += 1;
1097 log::error!(
1098 "❌ Browser {} validation test failed: {}",
1099 tracked.id(),
1100 e
1101 );
1102
1103 // Remove from active tracking since it's broken
1104 self.inner.remove_from_active(tracked.id());
1105 }
1106 }
1107 }
1108 Ok(Ok(Err(e))) => {
1109 failed_count += 1;
1110 log::error!("❌ Failed to create browser {}/{}: {}", i + 1, count, e);
1111 }
1112 Ok(Err(e)) => {
1113 failed_count += 1;
1114 log::error!(
1115 "❌ Browser {}/{} creation task panicked: {:?}",
1116 i + 1,
1117 count,
1118 e
1119 );
1120 }
1121 Err(_) => {
1122 failed_count += 1;
1123 log::error!(
1124 "❌ Browser {}/{} creation timed out (15s limit)",
1125 i + 1,
1126 count
1127 );
1128 }
1129 }
1130
1131 // === STAGGER LOGIC ===
1132 // If this is not the last browser, wait before creating the next one.
1133 // This ensures their TTLs are offset by `stagger_interval`.
1134 if i < count - 1 {
1135 log::info!(
1136 "⏳ Waiting {}s before creating next warmup browser to stagger TTLs...",
1137 stagger_interval.as_secs()
1138 );
1139 tokio::time::sleep(stagger_interval).await;
1140 }
1141 }
1142
1143 log::info!(
1144 " Warmup creation phase: {} created, {} failed",
1145 created_count,
1146 failed_count
1147 );
1148
1149 // Return all browsers to pool by dropping handles
1150 log::debug!(" Returning {} warmup browsers to pool...", handles.len());
1151 drop(handles);
1152
1153 // Small delay to ensure Drop handlers complete
1154 tokio::time::sleep(Duration::from_millis(300)).await;
1155
1156 let final_stats = self.stats();
1157 log::info!(
1158 " Warmup internal completed - Pool: {}, Active: {}",
1159 final_stats.available,
1160 final_stats.active
1161 );
1162
1163 Ok(())
1164 }
1165
1166 /// Start the keep-alive monitoring thread.
1167 ///
1168 /// This background thread:
1169 /// - Pings all active browsers periodically
1170 /// - Removes unresponsive browsers after max_ping_failures
1171 /// - Retires browsers that exceed TTL
1172 /// - Spawns replacement browsers as needed
1173 ///
1174 /// # Critical Design Notes
1175 ///
1176 /// - Uses condvar for immediate shutdown signaling
1177 /// - Never holds locks during I/O operations
1178 /// - Uses consistent lock ordering (active -> pool)
1179 ///
1180 /// # Parameters
1181 ///
1182 /// * `inner` - Arc reference to pool state.
1183 ///
1184 /// # Returns
1185 ///
1186 /// JoinHandle for the background thread.
1187 fn start_keep_alive(inner: Arc<BrowserPoolInner>) -> JoinHandle<()> {
1188 let ping_interval = inner.config().ping_interval;
1189 let max_failures = inner.config().max_ping_failures;
1190 let browser_ttl = inner.config().browser_ttl;
1191 let shutdown_signal = Arc::clone(inner.shutdown_signal());
1192
1193 log::info!(
1194 " Starting keep-alive thread (interval: {}s, max failures: {}, TTL: {}min)",
1195 ping_interval.as_secs(),
1196 max_failures,
1197 browser_ttl.as_secs() / 60
1198 );
1199
1200 thread::spawn(move || {
1201 log::info!(" Keep-alive thread started successfully");
1202
1203 // Track consecutive failures per browser ID
1204 let mut failure_counts: HashMap<u64, u32> = HashMap::new();
1205
1206 loop {
1207 // Wait for next ping interval OR shutdown signal (whichever comes first)
1208 // Using condvar instead of sleep allows immediate wake-up on shutdown
1209 let (lock, cvar) = &*shutdown_signal;
1210 let wait_result = {
1211 let shutdown = lock.lock().unwrap();
1212 cvar.wait_timeout(shutdown, ping_interval).unwrap()
1213 };
1214
1215 let shutdown_flag = *wait_result.0;
1216 let timed_out = wait_result.1.timed_out();
1217
1218 // Check if we were signaled to shutdown
1219 if shutdown_flag {
1220 log::info!(" Keep-alive received shutdown signal via condvar");
1221 break;
1222 }
1223
1224 // Double-check atomic shutdown flag (belt and suspenders)
1225 if inner.is_shutting_down() {
1226 log::info!(" Keep-alive detected shutdown via atomic flag");
1227 break;
1228 }
1229
1230 // If spuriously woken (not timeout, not shutdown), continue waiting
1231 if !timed_out {
1232 log::trace!("⏰ Keep-alive spuriously woken, continuing wait...");
1233 continue;
1234 }
1235
1236 log::trace!(" Keep-alive ping cycle starting...");
1237
1238 // Collect browsers to ping WITHOUT holding locks
1239 // This is critical: we clone the list and release the lock
1240 // before doing any I/O operations
1241 let browsers_to_ping = inner.get_active_browsers_snapshot();
1242 log::trace!(
1243 "Keep-alive checking {} active browsers",
1244 browsers_to_ping.len()
1245 );
1246
1247 // Now ping browsers without holding any locks
1248 let mut to_remove = Vec::new();
1249 let mut expired_browsers = Vec::new();
1250
1251 for (id, tracked) in browsers_to_ping {
1252 // Check shutdown during ping loop (allows early exit)
1253 if inner.is_shutting_down() {
1254 log::info!("Shutdown detected during ping loop, exiting immediately");
1255 return;
1256 }
1257
1258 // Check TTL before pinging (no point pinging expired browsers)
1259 if tracked.is_expired(browser_ttl) {
1260 log::info!(
1261 "Browser {} expired (age: {}min, TTL: {}min), marking for retirement",
1262 id,
1263 tracked.age_minutes(),
1264 browser_ttl.as_secs() / 60
1265 );
1266 expired_browsers.push(id);
1267 continue; // Skip ping for expired browsers
1268 }
1269
1270 // Perform health check (this is I/O, no locks held)
1271 use crate::traits::Healthcheck;
1272 match tracked.ping() {
1273 Ok(_) => {
1274 // Reset failure count on success
1275 if failure_counts.remove(&id).is_some() {
1276 log::debug!("Browser {} ping successful, failure count reset", id);
1277 }
1278 }
1279 Err(e) => {
1280 // Only process failures if NOT shutting down
1281 // (during shutdown, browsers may legitimately fail)
1282 if !inner.is_shutting_down() {
1283 let failures = failure_counts.entry(id).or_insert(0);
1284 *failures += 1;
1285
1286 log::warn!(
1287 "Browser {} ping failed (attempt {}/{}): {}",
1288 id,
1289 failures,
1290 max_failures,
1291 e
1292 );
1293
1294 // Remove if exceeded max failures
1295 if *failures >= max_failures {
1296 log::error!(
1297 "Browser {} exceeded max ping failures ({}), marking for removal",
1298 id,
1299 max_failures
1300 );
1301 to_remove.push(id);
1302 }
1303 }
1304 }
1305 }
1306 }
1307
1308 // Check shutdown before cleanup (avoid work if shutting down)
1309 if inner.is_shutting_down() {
1310 log::info!("Shutdown detected before cleanup, skipping and exiting");
1311 break;
1312 }
1313
1314 // Handle TTL retirements first (they need replacement browsers)
1315 if !expired_browsers.is_empty() {
1316 log::info!("Processing {} TTL-expired browsers", expired_browsers.len());
1317 Self::handle_browser_retirement(&inner, expired_browsers, &mut failure_counts);
1318 }
1319
1320 // Handle failed browsers (remove from tracking and pool)
1321 if !to_remove.is_empty() {
1322 log::warn!("Removing {} failed browsers from pool", to_remove.len());
1323
1324 // Track how many were actually removed so we know how many to replace
1325 let mut actual_removed_count = 0;
1326
1327 // Remove dead browsers from active tracking
1328 for id in &to_remove {
1329 if inner.remove_from_active(*id).is_some() {
1330 actual_removed_count += 1;
1331 log::debug!("Removed failed browser {} from active tracking", id);
1332 }
1333 failure_counts.remove(id);
1334 }
1335
1336 log::debug!(
1337 "Active browsers after failure cleanup: {}",
1338 inner.active_count()
1339 );
1340
1341 // Clean up pool (remove dead browsers)
1342 inner.remove_from_available(&to_remove);
1343
1344 log::debug!("Pool size after cleanup: {}", inner.available_count());
1345
1346 // Trigger replacement for the browsers we just removed
1347 if actual_removed_count > 0 {
1348 log::info!(
1349 "Spawning {} replacement browsers for failed ones",
1350 actual_removed_count
1351 );
1352 BrowserPoolInner::spawn_replacement_creation(
1353 Arc::clone(&inner),
1354 actual_removed_count,
1355 );
1356 }
1357 }
1358
1359 // Log keep-alive cycle summary
1360 log::debug!(
1361 "Keep-alive cycle complete - Active: {}, Pooled: {}, Tracking {} failure states",
1362 inner.active_count(),
1363 inner.available_count(),
1364 failure_counts.len()
1365 );
1366 }
1367
1368 log::info!("Keep-alive thread exiting cleanly");
1369 })
1370 }
1371
1372 /// Handle browser retirement due to TTL expiration.
1373 ///
1374 /// This function:
1375 /// 1. Removes expired browsers from active and pool tracking
1376 /// 2. Spawns async tasks to create replacement browsers
1377 /// 3. Maintains pool target size
1378 ///
1379 /// # Critical Lock Ordering
1380 ///
1381 /// Acquires active -> pool locks together to prevent races.
1382 ///
1383 /// # Parameters
1384 ///
1385 /// * `inner` - Arc reference to pool state.
1386 /// * `expired_ids` - List of browser IDs that have exceeded TTL.
1387 /// * `failure_counts` - Mutable map of failure counts (updated to remove retired browsers).
1388 fn handle_browser_retirement(
1389 inner: &Arc<BrowserPoolInner>,
1390 expired_ids: Vec<u64>,
1391 failure_counts: &mut HashMap<u64, u32>,
1392 ) {
1393 log::info!(
1394 "Retiring {} expired browsers (TTL enforcement)",
1395 expired_ids.len()
1396 );
1397
1398 // Remove expired browsers from active tracking
1399 let mut retired_count = 0;
1400 for id in &expired_ids {
1401 if inner.remove_from_active(*id).is_some() {
1402 retired_count += 1;
1403 log::debug!("Removed expired browser {} from active tracking", id);
1404 }
1405 // Clean up failure tracking
1406 failure_counts.remove(id);
1407 }
1408
1409 // Remove from pool as well
1410 inner.remove_from_available(&expired_ids);
1411
1412 log::debug!(
1413 "After retirement - Active: {}, Pooled: {}",
1414 inner.active_count(),
1415 inner.available_count()
1416 );
1417
1418 // Create replacement browsers to maintain target count
1419 if retired_count > 0 {
1420 log::info!(
1421 "Spawning {} replacement browsers for retired ones",
1422 retired_count
1423 );
1424 BrowserPoolInner::spawn_replacement_creation(Arc::clone(inner), retired_count);
1425 } else {
1426 log::debug!("No browsers were actually retired (already removed)");
1427 }
1428 }
1429
1430 /// Asynchronously shutdown the pool (recommended method).
1431 ///
1432 /// This is the preferred shutdown method as it can properly await
1433 /// async task cancellation. Should be called during application shutdown.
1434 ///
1435 /// # Shutdown Process
1436 ///
1437 /// 1. Set atomic shutdown flag (stops new operations)
1438 /// 2. Signal condvar to wake keep-alive thread immediately
1439 /// 3. Wait for keep-alive thread to exit (with timeout)
1440 /// 4. Abort all replacement creation tasks
1441 /// 5. Wait briefly for cleanup
1442 /// 6. Log final statistics
1443 ///
1444 /// # Timeout
1445 ///
1446 /// Keep-alive thread is given 5 seconds to exit gracefully.
1447 /// If it doesn't exit, we log an error but continue shutdown.
1448 ///
1449 /// # Example
1450 ///
1451 /// ```rust,ignore
1452 /// let mut pool = /* ... */;
1453 ///
1454 /// // During application shutdown
1455 /// pool.shutdown_async().await;
1456 /// ```
1457 pub async fn shutdown_async(&mut self) {
1458 log::info!("Shutting down browser pool (async mode)...");
1459
1460 // Step 1: Set shutdown flag (prevents new operations)
1461 self.inner.set_shutting_down(true);
1462 log::debug!("Shutdown flag set");
1463
1464 // Step 2: Signal condvar to wake keep-alive thread immediately
1465 // This is critical - without this, keep-alive waits for full ping_interval
1466 {
1467 let (lock, cvar) = &**self.inner.shutdown_signal();
1468 let mut shutdown = lock.lock().unwrap();
1469 *shutdown = true;
1470 cvar.notify_all();
1471 log::debug!("Shutdown signal sent to keep-alive thread");
1472 } // Lock released here
1473
1474 // Step 3: Wait for keep-alive thread to exit
1475 if let Some(handle) = self.keep_alive_handle.take() {
1476 log::debug!("Waiting for keep-alive thread to exit...");
1477
1478 // Wrap thread join in spawn_blocking to make it async-friendly
1479 let join_task = tokio::task::spawn_blocking(move || handle.join());
1480
1481 // Give it 5 seconds to exit gracefully
1482 match tokio::time::timeout(Duration::from_secs(5), join_task).await {
1483 Ok(Ok(Ok(_))) => {
1484 log::info!("Keep-alive thread stopped cleanly");
1485 }
1486 Ok(Ok(Err(_))) => {
1487 log::error!("Keep-alive thread panicked during shutdown");
1488 }
1489 Ok(Err(_)) => {
1490 log::error!("Keep-alive join task panicked");
1491 }
1492 Err(_) => {
1493 log::error!("Keep-alive thread didn't exit within 5s timeout");
1494 }
1495 }
1496 } else {
1497 log::debug!("No keep-alive thread to stop (was disabled or already stopped)");
1498 }
1499
1500 // Step 4: Abort all replacement creation tasks
1501 log::info!("Aborting replacement creation tasks...");
1502 let aborted_count = self.inner.abort_replacement_tasks();
1503 if aborted_count > 0 {
1504 log::info!("Aborted {} replacement tasks", aborted_count);
1505 } else {
1506 log::debug!("No replacement tasks to abort");
1507 }
1508
1509 // Step 5: Small delay to let aborted tasks clean up
1510 tokio::time::sleep(Duration::from_millis(100)).await;
1511
1512 // Step 6: Log final statistics
1513 let stats = self.stats();
1514 log::info!(
1515 "Async shutdown complete - Available: {}, Active: {}, Total: {}",
1516 stats.available,
1517 stats.active,
1518 stats.total
1519 );
1520 }
1521
1522 /// Synchronously shutdown the pool (fallback method).
1523 ///
1524 /// This is a simplified shutdown for use in Drop or non-async contexts.
1525 /// Prefer [`shutdown_async()`](Self::shutdown_async) when possible for cleaner task cancellation.
1526 ///
1527 /// # Note
1528 ///
1529 /// This method doesn't wait for replacement tasks to finish since
1530 /// there's no async runtime available. Tasks are aborted but may not
1531 /// have cleaned up yet.
1532 pub fn shutdown(&mut self) {
1533 log::debug!("Calling synchronous shutdown...");
1534 self.shutdown_sync();
1535 }
1536
1537 /// Internal synchronous shutdown implementation.
1538 fn shutdown_sync(&mut self) {
1539 log::info!("Shutting down browser pool (sync mode)...");
1540
1541 // Set shutdown flag
1542 self.inner.set_shutting_down(true);
1543 log::debug!("Shutdown flag set");
1544
1545 // Signal condvar (same as async version)
1546 {
1547 let (lock, cvar) = &**self.inner.shutdown_signal();
1548 let mut shutdown = lock.lock().unwrap();
1549 *shutdown = true;
1550 cvar.notify_all();
1551 log::debug!("Shutdown signal sent");
1552 }
1553
1554 // Wait for keep-alive thread
1555 if let Some(handle) = self.keep_alive_handle.take() {
1556 log::debug!("Joining keep-alive thread (sync)...");
1557
1558 match handle.join() {
1559 Ok(_) => log::info!("Keep-alive thread stopped"),
1560 Err(_) => log::error!("Keep-alive thread panicked"),
1561 }
1562 }
1563
1564 // Abort replacement tasks (best effort - they won't make progress without runtime)
1565 let aborted_count = self.inner.abort_replacement_tasks();
1566 if aborted_count > 0 {
1567 log::debug!("Aborted {} replacement tasks (sync mode)", aborted_count);
1568 }
1569
1570 let stats = self.stats();
1571 log::info!(
1572 "Sync shutdown complete - Available: {}, Active: {}",
1573 stats.available,
1574 stats.active
1575 );
1576 }
1577
1578 /// Get a reference to the inner pool state.
1579 ///
1580 /// This is primarily for internal use and testing.
1581 #[doc(hidden)]
1582 #[allow(dead_code)]
1583 pub(crate) fn inner(&self) -> &Arc<BrowserPoolInner> {
1584 &self.inner
1585 }
1586}
1587
1588impl Drop for BrowserPool {
1589 /// Automatic cleanup when pool is dropped.
1590 ///
1591 /// This ensures resources are released even if shutdown wasn't called explicitly.
1592 /// Uses sync shutdown since Drop can't be async.
1593 fn drop(&mut self) {
1594 log::debug!("� BrowserPool Drop triggered - running cleanup");
1595
1596 // Only shutdown if not already done
1597 if !self.inner.is_shutting_down() {
1598 log::warn!("� BrowserPool dropped without explicit shutdown - cleaning up");
1599 self.shutdown();
1600 } else {
1601 log::debug!(" Pool already shutdown, Drop is no-op");
1602 }
1603 }
1604}
1605
1606// ============================================================================
1607// BrowserPoolBuilder
1608// ============================================================================
1609
1610/// Builder for constructing a [`BrowserPool`] with validation.
1611///
1612/// This is the recommended way to create a pool as it validates
1613/// configuration and provides sensible defaults.
1614///
1615/// # Example
1616///
1617/// ```rust,ignore
1618/// use std::time::Duration;
1619/// use html2pdf_api::{BrowserPool, BrowserPoolConfigBuilder, ChromeBrowserFactory};
1620///
1621/// let pool = BrowserPool::builder()
1622/// .config(
1623/// BrowserPoolConfigBuilder::new()
1624/// .max_pool_size(10)
1625/// .warmup_count(5)
1626/// .browser_ttl(Duration::from_secs(7200))
1627/// .build()?
1628/// )
1629/// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1630/// .enable_keep_alive(true)
1631/// .build()?;
1632/// ```
1633pub struct BrowserPoolBuilder {
1634 /// Optional configuration (uses default if not provided).
1635 config: Option<BrowserPoolConfig>,
1636
1637 /// Browser factory (required).
1638 factory: Option<Box<dyn BrowserFactory>>,
1639
1640 /// Whether to enable keep-alive thread (default: true).
1641 enable_keep_alive: bool,
1642}
1643
1644impl BrowserPoolBuilder {
1645 /// Create a new builder with defaults.
1646 ///
1647 /// # Example
1648 ///
1649 /// ```rust,ignore
1650 /// let builder = BrowserPoolBuilder::new();
1651 /// ```
1652 pub fn new() -> Self {
1653 Self {
1654 config: None,
1655 factory: None,
1656 enable_keep_alive: true,
1657 }
1658 }
1659
1660 /// Set custom configuration.
1661 ///
1662 /// If not called, uses [`BrowserPoolConfig::default()`].
1663 ///
1664 /// # Parameters
1665 ///
1666 /// * `config` - Validated configuration from [`crate::BrowserPoolConfigBuilder`].
1667 ///
1668 /// # Example
1669 ///
1670 /// ```rust,ignore
1671 /// let config = BrowserPoolConfigBuilder::new()
1672 /// .max_pool_size(10)
1673 /// .build()?;
1674 ///
1675 /// let pool = BrowserPool::builder()
1676 /// .config(config)
1677 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1678 /// .build()?;
1679 /// ```
1680 pub fn config(mut self, config: BrowserPoolConfig) -> Self {
1681 self.config = Some(config);
1682 self
1683 }
1684
1685 /// Set browser factory (required).
1686 ///
1687 /// The factory is responsible for creating browser instances.
1688 /// Use [`ChromeBrowserFactory`](crate::ChromeBrowserFactory) for Chrome/Chromium browsers.
1689 ///
1690 /// # Parameters
1691 ///
1692 /// * `factory` - A boxed [`BrowserFactory`] implementation.
1693 ///
1694 /// # Example
1695 ///
1696 /// ```rust,ignore
1697 /// let pool = BrowserPool::builder()
1698 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1699 /// .build()?;
1700 /// ```
1701 pub fn factory(mut self, factory: Box<dyn BrowserFactory>) -> Self {
1702 self.factory = Some(factory);
1703 self
1704 }
1705
1706 /// Enable or disable keep-alive thread.
1707 ///
1708 /// Keep-alive should be disabled only for testing.
1709 /// Production use should always have it enabled.
1710 ///
1711 /// # Parameters
1712 ///
1713 /// * `enable` - Whether to enable the keep-alive thread.
1714 ///
1715 /// # Example
1716 ///
1717 /// ```rust,ignore
1718 /// // Disable for tests
1719 /// let pool = BrowserPool::builder()
1720 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1721 /// .enable_keep_alive(false)
1722 /// .build()?;
1723 /// ```
1724 pub fn enable_keep_alive(mut self, enable: bool) -> Self {
1725 self.enable_keep_alive = enable;
1726 self
1727 }
1728
1729 /// Build the browser pool.
1730 ///
1731 /// # Errors
1732 ///
1733 /// Returns [`BrowserPoolError::Configuration`] if factory is not provided.
1734 ///
1735 /// # Panics
1736 ///
1737 /// Panics if called outside a tokio runtime context.
1738 ///
1739 /// # Example
1740 ///
1741 /// ```rust,ignore
1742 /// let pool = BrowserPool::builder()
1743 /// .factory(Box::new(ChromeBrowserFactory::with_defaults()))
1744 /// .build()?;
1745 /// ```
1746 pub fn build(self) -> Result<BrowserPool> {
1747 let config = self.config.unwrap_or_default();
1748 let factory = self.factory.ok_or_else(|| {
1749 BrowserPoolError::Configuration("No browser factory provided".to_string())
1750 })?;
1751
1752 log::info!("️ Building browser pool with config: {:?}", config);
1753
1754 // Create inner state
1755 let inner = BrowserPoolInner::new(config, factory);
1756
1757 // Start keep-alive thread if enabled
1758 let keep_alive_handle = if self.enable_keep_alive {
1759 log::info!(" Starting keep-alive monitoring thread");
1760 Some(BrowserPool::start_keep_alive(Arc::clone(&inner)))
1761 } else {
1762 log::warn!("⚠️ Keep-alive thread disabled (should only be used for testing)");
1763 None
1764 };
1765
1766 log::info!("✅ Browser pool built successfully");
1767
1768 Ok(BrowserPool {
1769 inner,
1770 keep_alive_handle,
1771 })
1772 }
1773}
1774
1775impl Default for BrowserPoolBuilder {
1776 fn default() -> Self {
1777 Self::new()
1778 }
1779}
1780
1781// ============================================================================
1782// Environment Initialization (feature-gated)
1783// ============================================================================
1784
1785/// Initialize browser pool from environment variables.
1786///
1787/// This is a convenience function for common initialization patterns.
1788/// It reads configuration from environment variables with sensible defaults.
1789///
1790/// # Feature Flag
1791///
1792/// This function is only available when the `env-config` feature is enabled.
1793///
1794/// # Environment Variables
1795///
1796/// - `BROWSER_POOL_SIZE`: Maximum pool size (default: 5)
1797/// - `BROWSER_WARMUP_COUNT`: Warmup browser count (default: 3)
1798/// - `BROWSER_TTL_SECONDS`: Browser TTL in seconds (default: 3600)
1799/// - `BROWSER_WARMUP_TIMEOUT_SECONDS`: Warmup timeout (default: 60)
1800/// - `CHROME_PATH`: Custom Chrome binary path (optional)
1801///
1802/// # Returns
1803///
1804/// `Arc<Mutex<BrowserPool>>` ready for use in web handlers.
1805///
1806/// # Errors
1807///
1808/// - Returns error if configuration is invalid.
1809/// - Returns error if warmup fails.
1810///
1811/// # Example
1812///
1813/// ```rust,ignore
1814/// #[tokio::main]
1815/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
1816/// env_logger::init();
1817///
1818/// let pool = init_browser_pool().await?;
1819///
1820/// // Use pool in handlers...
1821///
1822/// Ok(())
1823/// }
1824/// ```
1825#[cfg(feature = "env-config")]
1826pub async fn init_browser_pool() -> Result<Arc<Mutex<BrowserPool>>> {
1827 use crate::config::env::{chrome_path_from_env, from_env};
1828 use crate::factory::ChromeBrowserFactory;
1829
1830 log::info!("Initializing browser pool from environment...");
1831
1832 // Load configuration from environment
1833 let config = from_env()?;
1834
1835 // Get optional Chrome path
1836 let chrome_path = chrome_path_from_env();
1837
1838 log::info!("Pool configuration from environment:");
1839 log::info!(" - Max pool size: {}", config.max_pool_size);
1840 log::info!(" - Warmup count: {}", config.warmup_count);
1841 log::info!(
1842 " - Browser TTL: {}s ({}min)",
1843 config.browser_ttl.as_secs(),
1844 config.browser_ttl.as_secs() / 60
1845 );
1846 log::info!(" - Warmup timeout: {}s", config.warmup_timeout.as_secs());
1847 log::info!(
1848 " - Chrome path: {}",
1849 chrome_path.as_deref().unwrap_or("auto-detect")
1850 );
1851
1852 // Create factory based on whether custom path is provided
1853 let factory: Box<dyn BrowserFactory> = match chrome_path {
1854 Some(path) => {
1855 log::info!("Using custom Chrome path: {}", path);
1856 Box::new(ChromeBrowserFactory::with_path(path))
1857 }
1858 None => {
1859 log::info!("Using auto-detected Chrome browser");
1860 Box::new(ChromeBrowserFactory::with_defaults())
1861 }
1862 };
1863
1864 // Create browser pool with Chrome factory
1865 log::debug!("Building browser pool...");
1866 let pool = BrowserPool::builder()
1867 .config(config.clone())
1868 .factory(factory)
1869 .enable_keep_alive(true)
1870 .build()
1871 .map_err(|e| {
1872 log::error!("❌ Failed to create browser pool: {}", e);
1873 e
1874 })?;
1875
1876 log::info!("✅ Browser pool created successfully");
1877
1878 // Warmup the pool
1879 log::info!(
1880 "Warming up browser pool with {} instances...",
1881 config.warmup_count
1882 );
1883 pool.warmup().await.map_err(|e| {
1884 log::error!("❌ Failed to warmup pool: {}", e);
1885 e
1886 })?;
1887
1888 let stats = pool.stats();
1889 log::info!(
1890 "✅ Browser pool ready - Available: {}, Active: {}, Total: {}",
1891 stats.available,
1892 stats.active,
1893 stats.total
1894 );
1895
1896 Ok(pool.into_shared())
1897}
1898
1899// ============================================================================
1900// Unit Tests
1901// ============================================================================
1902
1903#[cfg(test)]
1904mod tests {
1905 use super::*;
1906
1907 /// Verifies that BrowserPool builder rejects missing factory.
1908 ///
1909 /// A factory is mandatory because the pool needs to know how to
1910 /// create browser instances. This test ensures proper error handling.
1911 #[test]
1912 fn test_pool_builder_missing_factory() {
1913 // We need a tokio runtime for the builder
1914 let rt = tokio::runtime::Runtime::new().unwrap();
1915
1916 rt.block_on(async {
1917 let config = crate::config::BrowserPoolConfigBuilder::new()
1918 .max_pool_size(3)
1919 .build()
1920 .unwrap();
1921
1922 let result = BrowserPool::builder()
1923 .config(config)
1924 // Intentionally missing factory
1925 .build();
1926
1927 assert!(result.is_err(), "Build should fail without factory");
1928
1929 match result {
1930 Err(BrowserPoolError::Configuration(msg)) => {
1931 assert!(
1932 msg.contains("No browser factory provided"),
1933 "Expected factory error, got: {}",
1934 msg
1935 );
1936 }
1937 _ => panic!("Expected Configuration error for missing factory"),
1938 }
1939 });
1940 }
1941
1942 /// Verifies that BrowserPoolBuilder implements Default.
1943 #[test]
1944 fn test_builder_default() {
1945 let builder: BrowserPoolBuilder = Default::default();
1946 assert!(builder.config.is_none());
1947 assert!(builder.factory.is_none());
1948 assert!(builder.enable_keep_alive);
1949 }
1950
1951 /// Verifies that enable_keep_alive can be disabled.
1952 #[test]
1953 fn test_builder_disable_keep_alive() {
1954 let builder = BrowserPoolBuilder::new().enable_keep_alive(false);
1955 assert!(!builder.enable_keep_alive);
1956 }
1957}