html2pdf_api/service/pdf.rs
1//! Core PDF generation service (framework-agnostic).
2//!
3//! This module contains the core PDF generation logic that is shared across
4//! all web framework integrations. The functions here are **synchronous/blocking**
5//! and should be called from within a blocking context (e.g., `tokio::task::spawn_blocking`,
6//! `actix_web::web::block`, etc.).
7//!
8//! # Architecture
9//!
10//! ```text
11//! ┌─────────────────────────────────────────────────────────────────┐
12//! │ Framework Integration │
13//! │ (Actix-web / Rocket / Axum) │
14//! └─────────────────────────┬───────────────────────────────────────┘
15//! │ async context
16//! ▼
17//! ┌─────────────────────────────────────────────────────────────────┐
18//! │ spawn_blocking / web::block │
19//! └─────────────────────────┬───────────────────────────────────────┘
20//! │ blocking context
21//! ▼
22//! ┌─────────────────────────────────────────────────────────────────┐
23//! │ This Module (pdf.rs) │
24//! │ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │
25//! │ │generate_pdf_ │ │generate_pdf_ │ │get_pool_stats │ │
26//! │ │from_url │ │from_html │ │ │ │
27//! │ └────────┬────────┘ └────────┬────────┘ └─────────────────┘ │
28//! │ │ │ │
29//! │ └──────────┬─────────┘ │
30//! │ ▼ │
31//! │ ┌─────────────────────┐ │
32//! │ │generate_pdf_internal│ │
33//! │ └──────────┬──────────┘ │
34//! └──────────────────────┼──────────────────────────────────────────┘
35//! │
36//! ▼
37//! ┌─────────────────────────────────────────────────────────────────┐
38//! │ BrowserPool │
39//! │ (headless_chrome) │
40//! └─────────────────────────────────────────────────────────────────┘
41//! ```
42//!
43//! # Thread Safety
44//!
45//! All functions in this module are designed to be called from multiple threads
46//! concurrently. The browser pool is protected by a `Mutex`, and each PDF
47//! generation operation acquires a browser, uses it, and returns it to the pool
48//! automatically via RAII.
49//!
50//! # Blocking Behavior
51//!
52//! **Important:** These functions block the calling thread. In an async context,
53//! always wrap calls in a blocking task:
54//!
55//! ```rust,ignore
56//! // ✅ Correct: Using spawn_blocking
57//! let result = tokio::task::spawn_blocking(move || {
58//! generate_pdf_from_url(&pool, &request)
59//! }).await?;
60//!
61//! // ❌ Wrong: Calling directly in async context
62//! // This will block the async runtime!
63//! let result = generate_pdf_from_url(&pool, &request);
64//! ```
65//!
66//! # Usage Examples
67//!
68//! ## Basic URL to PDF Conversion
69//!
70//! ```rust,ignore
71//! use html2pdf_api::service::{generate_pdf_from_url, PdfFromUrlRequest};
72//!
73//! // Assuming `pool` is a BrowserPool
74//! let request = PdfFromUrlRequest {
75//! url: "https://example.com".to_string(),
76//! ..Default::default()
77//! };
78//!
79//! // In a blocking context:
80//! let response = generate_pdf_from_url(&pool, &request)?;
81//! println!("Generated PDF: {} bytes", response.data.len());
82//! ```
83//!
84//! ## HTML to PDF Conversion
85//!
86//! ```rust,ignore
87//! use html2pdf_api::service::{generate_pdf_from_html, PdfFromHtmlRequest};
88//!
89//! let request = PdfFromHtmlRequest {
90//! html: "<html><body><h1>Hello World</h1></body></html>".to_string(),
91//! filename: Some("hello.pdf".to_string()),
92//! ..Default::default()
93//! };
94//!
95//! let response = generate_pdf_from_html(&pool, &request)?;
96//! std::fs::write("hello.pdf", &response.data)?;
97//! ```
98//!
99//! ## With Async Web Framework
100//!
101//! ```rust,ignore
102//! use actix_web::{web, HttpResponse};
103//! use html2pdf_api::service::{generate_pdf_from_url, PdfFromUrlRequest};
104//!
105//! async fn handler(
106//! pool: web::Data<SharedPool>,
107//! query: web::Query<PdfFromUrlRequest>,
108//! ) -> HttpResponse {
109//! let pool = pool.into_inner();
110//! let request = query.into_inner();
111//!
112//! let result = web::block(move || {
113//! generate_pdf_from_url(&pool, &request)
114//! }).await;
115//!
116//! match result {
117//! Ok(Ok(pdf)) => HttpResponse::Ok()
118//! .content_type("application/pdf")
119//! .body(pdf.data),
120//! Ok(Err(e)) => HttpResponse::BadRequest().body(e.to_string()),
121//! Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
122//! }
123//! }
124//! ```
125//!
126//! # Performance Considerations
127//!
128//! | Operation | Typical Duration | Notes |
129//! |-----------|------------------|-------|
130//! | Pool lock acquisition | < 1ms | Fast, non-blocking |
131//! | Browser checkout | < 1ms | If browser available |
132//! | Browser creation | 500ms - 2s | If pool needs to create new browser |
133//! | Page navigation | 100ms - 10s | Depends on target page |
134//! | JavaScript wait | 0 - 15s | Configurable via `waitsecs` |
135//! | PDF generation | 100ms - 5s | Depends on page complexity |
136//! | Tab cleanup | < 100ms | Best effort, non-blocking |
137//!
138//! # Error Handling
139//!
140//! All functions return `Result<T, PdfServiceError>`. Errors are categorized
141//! and include appropriate HTTP status codes. See [`PdfServiceError`] for
142//! the complete error taxonomy.
143//!
144//! [`PdfServiceError`]: crate::service::PdfServiceError
145
146use headless_chrome::types::PrintToPdfOptions;
147use std::time::{Duration, Instant};
148
149use crate::handle::BrowserHandle;
150use crate::pool::BrowserPool;
151use crate::service::types::*;
152
153// ============================================================================
154// Constants
155// ============================================================================
156
157/// Default timeout for the entire PDF generation operation in seconds.
158///
159/// This timeout encompasses the complete operation including:
160/// - Browser acquisition from pool
161/// - Page navigation
162/// - JavaScript execution wait
163/// - PDF rendering
164/// - Tab cleanup
165///
166/// If the operation exceeds this duration, a [`PdfServiceError::Timeout`]
167/// error is returned.
168///
169/// # Default Value
170///
171/// `60` seconds - sufficient for most web pages, including those with
172/// heavy JavaScript and external resources.
173///
174/// # Customization
175///
176/// This constant is used by framework integrations for their timeout wrappers.
177/// To customize, create your own timeout wrapper around the service functions.
178///
179/// ```rust,ignore
180/// use std::time::Duration;
181/// use tokio::time::timeout;
182///
183/// let custom_timeout = Duration::from_secs(120); // 2 minutes
184///
185/// let result = timeout(custom_timeout, async {
186/// tokio::task::spawn_blocking(move || {
187/// generate_pdf_from_url(&pool, &request)
188/// }).await
189/// }).await;
190/// ```
191pub const DEFAULT_TIMEOUT_SECS: u64 = 60;
192
193/// Default wait time for JavaScript execution in seconds.
194///
195/// After page navigation completes, the service waits for JavaScript to finish
196/// rendering dynamic content. This constant defines the default wait time when
197/// not specified in the request.
198///
199/// # Behavior
200///
201/// During the wait period, the service polls every 200ms for `window.isPageDone === true`.
202/// If the page sets this flag, PDF generation proceeds immediately. Otherwise,
203/// the full wait duration elapses before generating the PDF.
204///
205/// # Default Value
206///
207/// `5` seconds - balances between allowing time for JavaScript execution
208/// and not waiting unnecessarily for simple pages.
209///
210/// # Recommendations
211///
212/// | Page Type | Recommended Wait |
213/// |-----------|------------------|
214/// | Static HTML | 1-2 seconds |
215/// | Light JavaScript (vanilla JS, jQuery) | 3-5 seconds |
216/// | Heavy SPA (React, Vue, Angular) | 5-10 seconds |
217/// | Complex visualizations (D3, charts) | 10-15 seconds |
218/// | Real-time data loading | 10-20 seconds |
219pub const DEFAULT_WAIT_SECS: u64 = 5;
220
221/// Polling interval for JavaScript completion check in milliseconds.
222///
223/// When waiting for JavaScript to complete, the service checks for
224/// `window.isPageDone === true` at this interval.
225///
226/// # Trade-offs
227///
228/// - **Shorter interval**: More responsive but higher CPU usage
229/// - **Longer interval**: Lower CPU usage but may overshoot ready state
230///
231/// # Default Value
232///
233/// `200` milliseconds - provides good responsiveness without excessive polling.
234const JS_POLL_INTERVAL_MS: u64 = 200;
235
236// ============================================================================
237// Public API - Core PDF Generation Functions
238// ============================================================================
239
240/// Generate a PDF from a URL.
241///
242/// Navigates to the specified URL using a browser from the pool, waits for
243/// JavaScript execution, and generates a PDF of the rendered page.
244///
245/// # Thread Safety
246///
247/// This function is thread-safe and can be called concurrently from multiple
248/// threads. The browser pool mutex ensures safe access to shared resources.
249///
250/// # Blocking Behavior
251///
252/// **This function blocks the calling thread.** In async contexts, wrap it
253/// in `tokio::task::spawn_blocking`, `actix_web::web::block`, or similar.
254///
255/// # Arguments
256///
257/// * `pool` - Reference to the browser pool. The pool uses fine-grained internal locks;\n/// browser checkout is fast (~1ms) and concurrent.
258/// * `request` - PDF generation parameters. See [`PdfFromUrlRequest`] for details.
259///
260/// # Returns
261///
262/// * `Ok(PdfResponse)` - Successfully generated PDF with binary data and metadata
263/// * `Err(PdfServiceError)` - Error with details about what went wrong
264///
265/// # Errors
266///
267/// | Error | Cause | Resolution |
268/// |-------|-------|------------|
269/// | [`InvalidUrl`] | URL is empty or malformed | Provide valid HTTP/HTTPS URL |
270/// | [`BrowserUnavailable`] | Pool exhausted | Retry or increase pool size |
271/// | [`TabCreationFailed`] | Browser issue | Automatic recovery |
272/// | [`NavigationFailed`] | URL unreachable | Check URL accessibility |
273/// | [`NavigationTimeout`] | Page too slow | Increase timeout or optimize page |
274/// | [`PdfGenerationFailed`] | Rendering issue | Simplify page or check content |
275///
276/// [`InvalidUrl`]: PdfServiceError::InvalidUrl
277/// [`BrowserUnavailable`]: PdfServiceError::BrowserUnavailable
278/// [`TabCreationFailed`]: PdfServiceError::TabCreationFailed
279/// [`NavigationFailed`]: PdfServiceError::NavigationFailed
280/// [`NavigationTimeout`]: PdfServiceError::NavigationTimeout
281/// [`PdfGenerationFailed`]: PdfServiceError::PdfGenerationFailed
282///
283/// # Examples
284///
285/// ## Basic Usage
286///
287/// ```rust,ignore
288/// use html2pdf_api::service::{generate_pdf_from_url, PdfFromUrlRequest};
289///
290/// let request = PdfFromUrlRequest {
291/// url: "https://example.com".to_string(),
292/// ..Default::default()
293/// };
294///
295/// let response = generate_pdf_from_url(&pool, &request)?;
296/// assert!(response.data.starts_with(b"%PDF-")); // Valid PDF header
297/// ```
298///
299/// ## With Custom Options
300///
301/// ```rust,ignore
302/// let request = PdfFromUrlRequest {
303/// url: "https://example.com/report".to_string(),
304/// filename: Some("quarterly-report.pdf".to_string()),
305/// landscape: Some(true), // Wide tables
306/// waitsecs: Some(10), // Complex charts
307/// download: Some(true), // Force download
308/// print_background: Some(true),
309/// };
310///
311/// let response = generate_pdf_from_url(&pool, &request)?;
312/// println!("Generated {} with {} bytes", response.filename, response.size());
313/// ```
314///
315/// ## Error Handling
316///
317/// ```rust,ignore
318/// match generate_pdf_from_url(&pool, &request) {
319/// Ok(pdf) => {
320/// // Success - use pdf.data
321/// }
322/// Err(PdfServiceError::InvalidUrl(msg)) => {
323/// // Client error - return 400
324/// eprintln!("Bad URL: {}", msg);
325/// }
326/// Err(PdfServiceError::BrowserUnavailable(_)) => {
327/// // Transient error - retry
328/// std::thread::sleep(Duration::from_secs(1));
329/// }
330/// Err(e) => {
331/// // Other error
332/// eprintln!("PDF generation failed: {}", e);
333/// }
334/// }
335/// ```
336///
337/// # Performance
338///
339/// Typical execution time breakdown for a moderately complex page:
340///
341/// ```text
342/// ┌────────────────────────────────────────────────────────────────┐
343/// │ Browser checkout ~1ms │
344/// │ ├─────────────────────────────────────────────────────────────┤
345/// │ Tab creation ~50ms │
346/// │ ├─────────────────────────────────────────────────────────────┤
347/// │ Navigation + page load ~500ms │
348/// │ ├─────────────────────────────────────────────────────────────┤
349/// │ JavaScript wait (configurable) ~5000ms │
350/// │ ├─────────────────────────────────────────────────────────────┤
351/// │ PDF rendering ~200ms │
352/// │ ├─────────────────────────────────────────────────────────────┤
353/// │ Tab cleanup ~50ms │
354/// └────────────────────────────────────────────────────────────────┘
355/// Total: ~5.8 seconds (dominated by JS wait)
356/// ```
357pub fn generate_pdf_from_url(
358 pool: &BrowserPool,
359 request: &PdfFromUrlRequest,
360) -> Result<PdfResponse, PdfServiceError> {
361 // Validate URL before acquiring browser
362 let url = validate_url(&request.url)?;
363
364 log::debug!(
365 "Generating PDF from URL: {} (landscape={}, wait={}s)",
366 url,
367 request.is_landscape(),
368 request.wait_duration().as_secs()
369 );
370
371 // Acquire browser from pool (lock held briefly)
372 let browser = acquire_browser(pool)?;
373
374 let print_options = build_print_options(
375 request.landscape,
376 request.display_header_footer,
377 request.print_background,
378 request.scale,
379 request.paper_width,
380 request.paper_height,
381 request.margin_top,
382 request.margin_bottom,
383 request.margin_left,
384 request.margin_right,
385 request.page_ranges.clone(),
386 request.header_template.clone(),
387 request.footer_template.clone(),
388 request.prefer_css_page_size,
389 );
390
391 // Generate PDF (lock released, browser returned via RAII on completion/error)
392 let pdf_data = generate_pdf_internal(
393 &browser,
394 &url,
395 request.wait_duration(),
396 print_options,
397 false, // offline_mode is disabled for URLs
398 )?;
399
400 log::info!(
401 "✅ PDF generated successfully from URL: {} ({} bytes)",
402 url,
403 pdf_data.len()
404 );
405
406 Ok(PdfResponse::new(
407 pdf_data,
408 request.filename_or_default(),
409 request.is_download(),
410 ))
411}
412
413/// Generate a PDF from HTML content.
414///
415/// Loads the provided HTML content into a browser tab using a data URL,
416/// waits for any JavaScript execution, and generates a PDF.
417///
418/// # Thread Safety
419///
420/// This function is thread-safe and can be called concurrently from multiple
421/// threads. See [`generate_pdf_from_url`] for details.
422///
423/// # Blocking Behavior
424///
425/// **This function blocks the calling thread.** See [`generate_pdf_from_url`]
426/// for guidance on async usage.
427///
428/// # How It Works
429///
430/// The HTML content is converted to a data URL:
431///
432/// ```text
433/// data:text/html;charset=utf-8,<encoded-html-content>
434/// ```
435///
436/// This allows loading HTML directly without a web server. The browser
437/// renders the HTML as if it were loaded from a regular URL.
438///
439/// # Arguments
440///
441/// * `pool` - Reference to the mutex-wrapped browser pool
442/// * `request` - HTML content and generation parameters. See [`PdfFromHtmlRequest`].
443///
444/// # Returns
445///
446/// * `Ok(PdfResponse)` - Successfully generated PDF
447/// * `Err(PdfServiceError)` - Error details
448///
449/// # Errors
450///
451/// | Error | Cause | Resolution |
452/// |-------|-------|------------|
453/// | [`EmptyHtml`] | HTML content is empty/whitespace | Provide HTML content |
454/// | [`BrowserUnavailable`] | Pool exhausted | Retry or increase pool size |
455/// | [`NavigationFailed`] | HTML parsing issue | Check HTML validity |
456/// | [`PdfGenerationFailed`] | Rendering issue | Simplify HTML |
457///
458/// [`EmptyHtml`]: PdfServiceError::EmptyHtml
459/// [`BrowserUnavailable`]: PdfServiceError::BrowserUnavailable
460/// [`NavigationFailed`]: PdfServiceError::NavigationFailed
461/// [`PdfGenerationFailed`]: PdfServiceError::PdfGenerationFailed
462///
463/// # Limitations
464///
465/// ## External Resources
466///
467/// Since HTML is loaded via data URL, relative URLs don't work:
468///
469/// ```html
470/// <!-- ❌ Won't work - relative URL -->
471/// <img src="/images/logo.png">
472///
473/// <!-- ✅ Works - absolute URL -->
474/// <img src="https://example.com/images/logo.png">
475///
476/// <!-- ✅ Works - inline base64 -->
477/// <img src="data:image/png;base64,iVBORw0KGgo...">
478/// ```
479///
480/// ## Size Limits
481///
482/// Data URLs have browser-specific size limits. For very large HTML documents
483/// (> 1MB), consider:
484/// - Hosting the HTML on a temporary server
485/// - Using [`generate_pdf_from_url`] instead
486/// - Splitting into multiple PDFs
487///
488/// # Examples
489///
490/// ## Simple HTML
491///
492/// ```rust,ignore
493/// use html2pdf_api::service::{generate_pdf_from_html, PdfFromHtmlRequest};
494///
495/// let request = PdfFromHtmlRequest {
496/// html: "<h1>Hello World</h1><p>This is a test.</p>".to_string(),
497/// ..Default::default()
498/// };
499///
500/// let response = generate_pdf_from_html(&pool, &request)?;
501/// std::fs::write("output.pdf", &response.data)?;
502/// ```
503///
504/// ## Complete Document with Styling
505///
506/// ```rust,ignore
507/// let html = r#"
508/// <!DOCTYPE html>
509/// <html>
510/// <head>
511/// <meta charset="UTF-8">
512/// <style>
513/// body {
514/// font-family: 'Arial', sans-serif;
515/// margin: 40px;
516/// color: #333;
517/// }
518/// h1 {
519/// color: #0066cc;
520/// border-bottom: 2px solid #0066cc;
521/// padding-bottom: 10px;
522/// }
523/// table {
524/// width: 100%;
525/// border-collapse: collapse;
526/// margin-top: 20px;
527/// }
528/// th, td {
529/// border: 1px solid #ddd;
530/// padding: 12px;
531/// text-align: left;
532/// }
533/// th {
534/// background-color: #f5f5f5;
535/// }
536/// </style>
537/// </head>
538/// <body>
539/// <h1>Monthly Report</h1>
540/// <p>Generated on: 2024-01-15</p>
541/// <table>
542/// <tr><th>Metric</th><th>Value</th></tr>
543/// <tr><td>Revenue</td><td>$50,000</td></tr>
544/// <tr><td>Users</td><td>1,234</td></tr>
545/// </table>
546/// </body>
547/// </html>
548/// "#;
549///
550/// let request = PdfFromHtmlRequest {
551/// html: html.to_string(),
552/// filename: Some("monthly-report.pdf".to_string()),
553/// print_background: Some(true), // Include styled backgrounds
554/// ..Default::default()
555/// };
556///
557/// let response = generate_pdf_from_html(&pool, &request)?;
558/// ```
559///
560/// ## With Embedded Images
561///
562/// ```rust,ignore
563/// // Base64 encode an image
564/// let image_base64 = base64::encode(std::fs::read("logo.png")?);
565///
566/// let html = format!(r#"
567/// <!DOCTYPE html>
568/// <html>
569/// <body>
570/// <img src="data:image/png;base64,{}" alt="Logo">
571/// <h1>Company Report</h1>
572/// </body>
573/// </html>
574/// "#, image_base64);
575///
576/// let request = PdfFromHtmlRequest {
577/// html,
578/// ..Default::default()
579/// };
580///
581/// let response = generate_pdf_from_html(&pool, &request)?;
582/// ```
583pub fn generate_pdf_from_html(
584 pool: &BrowserPool,
585 request: &PdfFromHtmlRequest,
586) -> Result<PdfResponse, PdfServiceError> {
587 // Validate HTML content
588 if request.html.trim().is_empty() {
589 log::warn!("Empty HTML content provided");
590 return Err(PdfServiceError::EmptyHtml);
591 }
592
593 log::debug!(
594 "Generating PDF from HTML ({} bytes, landscape={}, wait={}s)",
595 request.html.len(),
596 request.is_landscape(),
597 request.wait_duration().as_secs()
598 );
599
600 // Acquire browser from pool
601 let browser = acquire_browser(pool)?;
602
603 // Convert HTML to data URL
604 // Using percent-encoding to handle special characters
605 let data_url = format!(
606 "data:text/html;charset=utf-8,{}",
607 urlencoding::encode(&request.html)
608 );
609
610 log::trace!("Data URL length: {} bytes", data_url.len());
611
612 let print_options = build_print_options(
613 request.landscape,
614 request.display_header_footer,
615 request.print_background,
616 request.scale,
617 request.paper_width,
618 request.paper_height,
619 request.margin_top,
620 request.margin_bottom,
621 request.margin_left,
622 request.margin_right,
623 request.page_ranges.clone(),
624 request.header_template.clone(),
625 request.footer_template.clone(),
626 request.prefer_css_page_size,
627 );
628
629 // Generate PDF
630 let pdf_data = generate_pdf_internal(
631 &browser,
632 &data_url,
633 request.wait_duration(),
634 print_options,
635 request.offline_mode.unwrap_or(false),
636 )?;
637
638 log::info!(
639 "✅ PDF generated successfully from HTML ({} bytes input → {} bytes output)",
640 request.html.len(),
641 pdf_data.len()
642 );
643
644 Ok(PdfResponse::new(
645 pdf_data,
646 request.filename_or_default(),
647 request.is_download(),
648 ))
649}
650
651/// Get current browser pool statistics.
652///
653/// Returns real-time metrics about the browser pool state including
654/// available browsers, active browsers, and total count.
655///
656/// # Thread Safety
657///
658/// This function briefly acquires the pool lock to read statistics.
659/// It's safe to call frequently for monitoring purposes.
660///
661/// # Blocking Behavior
662///
663/// This function is fast (< 1ms) as it reads from the pool's internal
664/// state. Safe to call frequently from health check endpoints.
665///
666/// # Arguments
667///
668/// * `pool` - Reference to the browser pool
669///
670/// # Returns
671///
672/// * `Ok(PoolStatsResponse)` - Current pool statistics
673///
674/// # Examples
675///
676/// ## Basic Usage
677///
678/// ```rust,ignore
679/// use html2pdf_api::service::get_pool_stats;
680///
681/// let stats = get_pool_stats(&pool)?;
682/// println!("Available: {}", stats.available);
683/// println!("Active: {}", stats.active);
684/// println!("Total: {}", stats.total);
685/// ```
686///
687/// ## Monitoring Integration
688///
689/// ```rust,ignore
690/// use prometheus::{Gauge, register_gauge};
691///
692/// lazy_static! {
693/// static ref POOL_AVAILABLE: Gauge = register_gauge!(
694/// "browser_pool_available",
695/// "Number of available browsers in pool"
696/// ).unwrap();
697/// static ref POOL_ACTIVE: Gauge = register_gauge!(
698/// "browser_pool_active",
699/// "Number of active browsers in pool"
700/// ).unwrap();
701/// }
702///
703/// fn update_metrics(pool: &Mutex<BrowserPool>) {
704/// if let Ok(stats) = get_pool_stats(pool) {
705/// POOL_AVAILABLE.set(stats.available as f64);
706/// POOL_ACTIVE.set(stats.active as f64);
707/// }
708/// }
709/// ```
710///
711/// ## Capacity Check
712///
713/// ```rust,ignore
714/// let stats = get_pool_stats(&pool)?;
715///
716/// if stats.available == 0 {
717/// log::warn!("No browsers available, requests may be delayed");
718/// }
719///
720/// let utilization = stats.active as f64 / stats.total.max(1) as f64;
721/// if utilization > 0.8 {
722/// log::warn!("Pool utilization at {:.0}%, consider scaling", utilization * 100.0);
723/// }
724/// ```
725pub fn get_pool_stats(pool: &BrowserPool) -> Result<PoolStatsResponse, PdfServiceError> {
726 let stats = pool.stats();
727
728 Ok(PoolStatsResponse {
729 available: stats.available,
730 active: stats.active,
731 total: stats.total,
732 })
733}
734
735/// Check if the browser pool is ready to handle requests.
736///
737/// Returns `true` if the pool has available browsers or capacity to create
738/// new ones. This is useful for readiness probes in container orchestration.
739///
740/// # Readiness Criteria
741///
742/// The pool is considered "ready" if either:
743/// - There are idle browsers available (`available > 0`), OR
744/// - There is capacity to create new browsers (`active < max_pool_size`)
745///
746/// The pool is "not ready" only when:
747/// - All browsers are in use AND the pool is at maximum capacity
748///
749/// # Arguments
750///
751/// * `pool` - Reference to the browser pool
752///
753/// # Returns
754///
755/// * `Ok(true)` - Pool can accept new requests
756/// * `Ok(false)` - Pool is at capacity, requests will queue
757///
758/// # Use Cases
759///
760/// ## Kubernetes Readiness Probe
761///
762/// ```yaml
763/// readinessProbe:
764/// httpGet:
765/// path: /ready
766/// port: 8080
767/// initialDelaySeconds: 5
768/// periodSeconds: 10
769/// ```
770///
771/// ## Load Balancer Health Check
772///
773/// When `is_pool_ready` returns `false`, the endpoint should return
774/// HTTP 503 Service Unavailable to remove the instance from rotation.
775///
776/// # Examples
777///
778/// ## Basic Check
779///
780/// ```rust,ignore
781/// use html2pdf_api::service::is_pool_ready;
782///
783/// if is_pool_ready(&pool)? {
784/// println!("Pool is ready to accept requests");
785/// } else {
786/// println!("Pool is at capacity");
787/// }
788/// ```
789///
790/// ## Request Gating
791///
792/// ```rust,ignore
793/// async fn handle_request(pool: &Mutex<BrowserPool>, request: PdfFromUrlRequest) -> Result<PdfResponse, Error> {
794/// // Quick capacity check before expensive operation
795/// if !is_pool_ready(pool)? {
796/// return Err(Error::ServiceUnavailable("Pool at capacity, try again later"));
797/// }
798///
799/// // Proceed with PDF generation
800/// generate_pdf_from_url(pool, &request)
801/// }
802/// ```
803pub fn is_pool_ready(pool: &BrowserPool) -> Result<bool, PdfServiceError> {
804 let stats = pool.stats();
805 let config = pool.config();
806
807 // Ready if we have available browsers OR we can create more
808 let is_ready = stats.available > 0 || stats.active < config.max_pool_size;
809
810 log::trace!(
811 "Pool readiness check: available={}, active={}, max={}, ready={}",
812 stats.available,
813 stats.active,
814 config.max_pool_size,
815 is_ready
816 );
817
818 Ok(is_ready)
819}
820
821// ============================================================================
822// Internal Helper Functions
823// ============================================================================
824
825/// Validate and normalize a URL string.
826///
827/// Parses the URL using the `url` crate and returns the normalized form.
828/// This catches malformed URLs early, before acquiring a browser.
829///
830/// # Validation Rules
831///
832/// - URL must not be empty
833/// - URL must be parseable by the `url` crate
834/// - Scheme must be present (http/https/file/data)
835///
836/// # Arguments
837///
838/// * `url` - The URL string to validate
839///
840/// # Returns
841///
842/// * `Ok(String)` - The normalized URL
843/// * `Err(PdfServiceError::InvalidUrl)` - If validation fails
844///
845/// # Examples
846///
847/// ```rust,ignore
848/// assert!(validate_url("https://example.com").is_ok());
849/// assert!(validate_url("").is_err());
850/// assert!(validate_url("not-a-url").is_err());
851/// ```
852fn validate_url(url: &str) -> Result<String, PdfServiceError> {
853 // Check for empty URL first (better error message)
854 if url.trim().is_empty() {
855 log::debug!("URL validation failed: empty URL");
856 return Err(PdfServiceError::InvalidUrl("URL is required".to_string()));
857 }
858
859 // Parse and normalize the URL
860 match url::Url::parse(url) {
861 Ok(parsed) => {
862 let scheme = parsed.scheme();
863 if scheme != "http" && scheme != "https" && scheme != "data" {
864 log::debug!("URL validation failed: unsupported scheme '{}'", scheme);
865 return Err(PdfServiceError::InvalidUrl(format!(
866 "Unsupported URL scheme '{}'. Only http, https, and data are allowed",
867 scheme
868 )));
869 }
870 log::trace!("URL validated successfully: {}", parsed);
871 Ok(parsed.to_string())
872 }
873 Err(e) => {
874 log::debug!("URL validation failed for '{}': {}", url, e);
875 Err(PdfServiceError::InvalidUrl(e.to_string()))
876 }
877 }
878}
879
880/// Acquire a browser from the pool.
881///
882/// Locks the pool mutex, retrieves a browser, and returns it. The lock is
883/// released immediately after checkout, not held during PDF generation.
884///
885/// # Browser Lifecycle
886///
887/// The returned `BrowserHandle` uses RAII to automatically return the
888/// browser to the pool when dropped:
889///
890/// ```text
891/// ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
892/// │ acquire_browser │ ──▶ │ BrowserHandle │ ──▶ │ PDF Generation │
893/// │ (lock, get) │ │ (RAII guard) │ │ (uses browser) │
894/// └─────────────────┘ └─────────────────┘ └────────┬────────┘
895/// │
896/// ▼
897/// ┌─────────────────┐ ┌─────────────────┐
898/// │ Back to Pool │ ◀── │ Drop Handle │
899/// │ (automatic) │ │ (RAII cleanup) │
900/// └─────────────────┘ └─────────────────┘
901/// ```
902///
903/// # Arguments
904///
905/// * `pool` - Reference to the mutex-wrapped browser pool
906///
907/// # Returns
908///
909/// * `Ok(BrowserHandle)` - A browser ready for use
910/// * `Err(PdfServiceError)` - If pool lock or browser acquisition fails
911fn acquire_browser(pool: &BrowserPool) -> Result<BrowserHandle, PdfServiceError> {
912 // Get a browser from the pool (no outer lock needed — pool uses internal locks)
913 let browser = pool.get().map_err(|e| {
914 log::error!("❌ Failed to get browser from pool: {}", e);
915 PdfServiceError::BrowserUnavailable(e.to_string())
916 })?;
917
918 log::debug!("Acquired browser {} from pool", browser.id());
919
920 Ok(browser)
921}
922
923/// Core PDF generation logic.
924///
925/// This function performs the actual work of:
926/// 1. Creating a new browser tab
927/// 2. Navigating to the URL
928/// 3. Waiting for JavaScript completion
929/// 4. Generating the PDF
930/// 5. Cleaning up the tab
931///
932/// # Arguments
933///
934/// * `browser` - Browser handle from the pool
935/// * `url` - URL to navigate to (can be http/https or data: URL)
936/// * `wait_duration` - How long to wait for JavaScript
937/// * `landscape` - Whether to use landscape orientation
938/// * `print_background` - Whether to include background graphics
939///
940/// # Returns
941///
942/// * `Ok(Vec<u8>)` - The raw PDF binary data
943/// * `Err(PdfServiceError)` - If any step fails
944///
945/// # Tab Lifecycle
946///
947/// A new tab is created for each PDF generation and closed afterward.
948/// This ensures clean state and prevents memory leaks from accumulating
949/// page resources.
950///
951/// ```text
952/// Browser Instance
953/// ├── Tab 1 (new) ◀── Created for this request
954/// │ ├── Navigate to URL
955/// │ ├── Wait for JS
956/// │ ├── Generate PDF
957/// │ └── Close tab ◀── Cleanup
958/// └── (available for next request)
959/// ```
960fn generate_pdf_internal(
961 browser: &BrowserHandle,
962 url: &str,
963 wait_duration: Duration,
964 print_options: Option<PrintToPdfOptions>,
965 offline_mode: bool,
966) -> Result<Vec<u8>, PdfServiceError> {
967 let start_time = Instant::now();
968
969 // Create new tab
970 log::trace!("Creating new browser tab");
971 let tab = browser.new_tab().map_err(|e| {
972 log::error!("❌ Failed to create tab: {}", e);
973 browser.mark_unhealthy(); // Poison pill prevention
974 PdfServiceError::TabCreationFailed(e.to_string())
975 })?;
976
977 if offline_mode {
978 log::info!("🛡️ Enabling CDP Offline Mode (SSRF Defense)");
979 let _ = tab
980 .call_method(
981 headless_chrome::protocol::cdp::Network::EmulateNetworkConditions {
982 offline: true,
983 latency: 0.0,
984 download_throughput: 0.0,
985 upload_throughput: 0.0,
986 connection_Type: None,
987 packet_loss: None,
988 packet_queue_length: None,
989 packet_reordering: None,
990 },
991 )
992 .map_err(|e| log::warn!("Failed to apply offline mode: {}", e));
993 }
994
995 // Navigate to URL
996 log::trace!("Navigating to URL: {}", truncate_url(url, 100));
997 let nav_start = Instant::now();
998
999 let page = tab
1000 .navigate_to(url)
1001 .map_err(|e| {
1002 log::error!("❌ Failed to navigate to URL: {}", e);
1003 browser.mark_unhealthy();
1004 PdfServiceError::NavigationFailed(e.to_string())
1005 })?
1006 .wait_until_navigated()
1007 .map_err(|e| {
1008 log::error!("❌ Navigation timeout: {}", e);
1009 browser.mark_unhealthy();
1010 PdfServiceError::NavigationTimeout(e.to_string())
1011 })?;
1012
1013 log::debug!("Navigation completed in {:?}", nav_start.elapsed());
1014
1015 // Wait for JavaScript execution
1016 wait_for_page_ready(&tab, wait_duration);
1017
1018 // Generate PDF
1019 log::trace!("Generating PDF");
1020 let pdf_start = Instant::now();
1021
1022 let pdf_data = page.print_to_pdf(print_options).map_err(|e| {
1023 log::error!("❌ Failed to generate PDF: {}", e);
1024 PdfServiceError::PdfGenerationFailed(e.to_string())
1025 })?;
1026
1027 log::debug!(
1028 "PDF generated in {:?} ({} bytes)",
1029 pdf_start.elapsed(),
1030 pdf_data.len()
1031 );
1032
1033 // Close tab (best effort - don't fail if this doesn't work)
1034 close_tab_safely(&tab);
1035
1036 log::debug!("Total PDF generation time: {:?}", start_time.elapsed());
1037
1038 Ok(pdf_data)
1039}
1040
1041/// Build PDF print options.
1042///
1043/// Creates the `PrintToPdfOptions` struct with the specified settings
1044/// and sensible defaults for margins and other options.
1045///
1046/// # Default Settings
1047///
1048/// - **Margins**: All set to 0 (full page)
1049/// - **Header/Footer**: Disabled
1050/// - **Background**: Configurable (default: true)
1051/// - **Scale**: 1.0 (100%)
1052#[allow(clippy::too_many_arguments)]
1053fn build_print_options(
1054 landscape: Option<bool>,
1055 display_header_footer: Option<bool>,
1056 print_background: Option<bool>,
1057 scale: Option<f64>,
1058 paper_width: Option<f64>,
1059 paper_height: Option<f64>,
1060 margin_top: Option<f64>,
1061 margin_bottom: Option<f64>,
1062 margin_left: Option<f64>,
1063 margin_right: Option<f64>,
1064 page_ranges: Option<String>,
1065 header_template: Option<String>,
1066 footer_template: Option<String>,
1067 prefer_css_page_size: Option<bool>,
1068) -> Option<PrintToPdfOptions> {
1069 Some(PrintToPdfOptions {
1070 landscape,
1071 display_header_footer,
1072 print_background,
1073 scale,
1074 paper_width,
1075 paper_height,
1076 margin_top,
1077 margin_bottom,
1078 margin_left,
1079 margin_right,
1080 page_ranges,
1081 header_template,
1082 footer_template,
1083 prefer_css_page_size,
1084 ..Default::default()
1085 })
1086}
1087
1088/// Wait for the page to signal it's ready for PDF generation.
1089///
1090/// This function implements a polling loop that checks for `window.isPageDone === true`.
1091/// This allows JavaScript-heavy pages to signal when they've finished rendering,
1092/// enabling early PDF generation without waiting the full timeout.
1093///
1094/// # Behavior Summary
1095///
1096/// | Page State | Result |
1097/// |------------|--------|
1098/// | `window.isPageDone = true` | Returns **immediately** (early exit) |
1099/// | `window.isPageDone = false` | Waits **full duration** |
1100/// | `window.isPageDone` not defined | Waits **full duration** |
1101/// | JavaScript error during check | Waits **full duration** |
1102///
1103/// # Default Behavior (No Flag Set)
1104///
1105/// **Important:** If the page does not set `window.isPageDone = true`, this function
1106/// waits the **full `max_wait` duration** before returning. This is intentional -
1107/// it gives JavaScript-heavy pages time to render even without explicit signaling.
1108///
1109/// For example, with the default `waitsecs = 5`:
1110/// - A page **with** the flag set immediately: ~0ms wait
1111/// - A page **without** the flag: full 5000ms wait
1112///
1113/// # How It Works
1114///
1115/// ```text
1116/// ┌─────────────────────────────────────────────────────────────────┐
1117/// │ wait_for_page_ready │
1118/// │ │
1119/// │ ┌─────────┐ ┌──────────────┐ ┌─────────────────────┐ │
1120/// │ │ Start │────▶│ Check flag │────▶│ window.isPageDone? │ │
1121/// │ └─────────┘ └──────────────┘ └──────────┬──────────┘ │
1122/// │ │ │
1123/// │ ┌────────────────────────┼─────────┐ │
1124/// │ │ │ │ │
1125/// │ ▼ ▼ │ │
1126/// │ ┌────────────┐ ┌───────────┐ │ │
1127/// │ │ true │ │ false / │ │ │
1128/// │ │ (ready!) │ │ undefined │ │ │
1129/// │ └─────┬──────┘ └─────┬─────┘ │ │
1130/// │ │ │ │ │
1131/// │ ▼ ▼ │ │
1132/// │ ┌───────────┐ ┌───────────┐ │ │
1133/// │ │ Return │ │ Sleep │ │ │
1134/// │ │ early │ │ 200ms │─────┘ │
1135/// │ └───────────┘ └───────────┘ │
1136/// │ │ │
1137/// │ ▼ │
1138/// │ ┌───────────┐ │
1139/// │ │ Timeout? │ │
1140/// │ └─────┬─────┘ │
1141/// │ │ │
1142/// │ ┌────────────┴────────────┐ │
1143/// │ ▼ ▼ │
1144/// │ ┌───────────┐ ┌──────┐ │
1145/// │ │ Yes │ │ No │ │
1146/// │ │ (proceed) │ │(loop)│ │
1147/// │ └───────────┘ └──────┘ │
1148/// └─────────────────────────────────────────────────────────────────┘
1149/// ```
1150///
1151/// # Polling Timeline
1152///
1153/// The function polls every 200ms (see `JS_POLL_INTERVAL_MS`):
1154///
1155/// ```text
1156/// Time: 0ms 200ms 400ms 600ms 800ms ... 5000ms
1157/// │ │ │ │ │ │
1158/// ▼ ▼ ▼ ▼ ▼ ▼
1159/// Poll Poll Poll Poll Poll ... Timeout
1160/// │ │ │ │ │ │
1161/// └───────┴───────┴───────┴───────┴───────────┤
1162/// ▼
1163/// Proceed to PDF
1164///
1165/// If window.isPageDone = true at any poll → Exit immediately
1166/// ```
1167///
1168/// Each poll executes this JavaScript:
1169///
1170/// ```javascript
1171/// window.isPageDone === true // Returns true, false, or undefined
1172/// ```
1173///
1174/// - `true` → Function returns immediately
1175/// - `false` / `undefined` / error → Continue polling until timeout
1176///
1177/// # Page-Side Implementation (Optional)
1178///
1179/// To enable early completion and avoid unnecessary waiting, add this to your
1180/// page's JavaScript **after** all content is rendered:
1181///
1182/// ```javascript
1183/// // Signal that the page is ready for PDF generation
1184/// window.isPageDone = true;
1185/// ```
1186///
1187/// ## Framework Examples
1188///
1189/// **React:**
1190/// ```javascript
1191/// useEffect(() => {
1192/// fetchData().then((result) => {
1193/// setData(result);
1194/// // Signal ready after state update and re-render
1195/// setTimeout(() => { window.isPageDone = true; }, 0);
1196/// });
1197/// }, []);
1198/// ```
1199///
1200/// **Vue:**
1201/// ```javascript
1202/// mounted() {
1203/// this.loadData().then(() => {
1204/// this.$nextTick(() => {
1205/// window.isPageDone = true;
1206/// });
1207/// });
1208/// }
1209/// ```
1210///
1211/// **Vanilla JavaScript:**
1212/// ```javascript
1213/// document.addEventListener('DOMContentLoaded', async () => {
1214/// await loadDynamicContent();
1215/// await renderCharts();
1216/// window.isPageDone = true; // All done!
1217/// });
1218/// ```
1219///
1220/// # When to Increase `waitsecs`
1221///
1222/// If you cannot modify the target page to set `window.isPageDone`, increase
1223/// `waitsecs` based on the page complexity:
1224///
1225/// | Page Type | Recommended `waitsecs` |
1226/// |-----------|------------------------|
1227/// | Static HTML (no JS) | 1 |
1228/// | Light JS (form validation, simple DOM) | 2-3 |
1229/// | Moderate JS (API calls, dynamic content) | 5 (default) |
1230/// | Heavy SPA (React, Vue, Angular) | 5-10 |
1231/// | Complex visualizations (D3, charts, maps) | 10-15 |
1232/// | Pages loading external resources | 10-20 |
1233///
1234/// # Performance Optimization
1235///
1236/// For high-throughput scenarios, implementing `window.isPageDone` on your
1237/// pages can significantly improve performance:
1238///
1239/// ```text
1240/// Without flag (5s default wait):
1241/// Request 1: ████████████████████ 5.2s
1242/// Request 2: ████████████████████ 5.1s
1243/// Request 3: ████████████████████ 5.3s
1244/// Average: 5.2s per PDF
1245///
1246/// With flag (page ready in 800ms):
1247/// Request 1: ████ 0.9s
1248/// Request 2: ████ 0.8s
1249/// Request 3: ████ 0.9s
1250/// Average: 0.87s per PDF (6x faster!)
1251/// ```
1252///
1253/// # Arguments
1254///
1255/// * `tab` - The browser tab to check. Must have completed navigation.
1256/// * `max_wait` - Maximum time to wait before proceeding with PDF generation.
1257/// This is the upper bound; the function may return earlier if the page
1258/// signals readiness.
1259///
1260/// # Returns
1261///
1262/// This function returns `()` (unit). It either:
1263/// - Returns early when `window.isPageDone === true` is detected
1264/// - Returns after `max_wait` duration has elapsed (timeout)
1265///
1266/// In both cases, PDF generation proceeds afterward. This function never fails -
1267/// timeout is a normal completion path, not an error.
1268///
1269/// # Thread Blocking
1270///
1271/// This function blocks the calling thread with `std::thread::sleep()`.
1272/// Always call from within a blocking context (e.g., `spawn_blocking`).
1273///
1274/// # Example
1275///
1276/// ```rust,ignore
1277/// // Navigate to page first
1278/// let page = tab.navigate_to(url)?.wait_until_navigated()?;
1279///
1280/// // Wait up to 10 seconds for JavaScript
1281/// wait_for_page_ready(&tab, Duration::from_secs(10));
1282///
1283/// // Now generate PDF - page is either ready or we've waited long enough
1284/// let pdf_data = page.print_to_pdf(options)?;
1285/// ```
1286fn wait_for_page_ready(tab: &headless_chrome::Tab, max_wait: Duration) {
1287 let start = Instant::now();
1288 let poll_interval = Duration::from_millis(JS_POLL_INTERVAL_MS);
1289
1290 log::trace!(
1291 "Waiting up to {:?} for page to be ready (polling every {:?})",
1292 max_wait,
1293 poll_interval
1294 );
1295
1296 while start.elapsed() < max_wait {
1297 // Check if page signals completion
1298 let is_done = tab
1299 .evaluate("window.isPageDone === true", false)
1300 .map(|result| result.value.and_then(|v| v.as_bool()).unwrap_or(false))
1301 .unwrap_or(false);
1302
1303 if is_done {
1304 log::debug!("Page signaled ready after {:?}", start.elapsed());
1305 return;
1306 }
1307
1308 // Sleep before next poll
1309 std::thread::sleep(poll_interval);
1310 }
1311
1312 log::debug!(
1313 "Page wait completed after {:?} (timeout, proceeding anyway)",
1314 start.elapsed()
1315 );
1316}
1317
1318/// Safely close a browser tab, ignoring errors.
1319///
1320/// Tab cleanup is best-effort. If it fails, we log a warning but don't
1321/// propagate the error since the PDF generation already succeeded.
1322///
1323/// # Why Best-Effort?
1324///
1325/// - The PDF data is already captured
1326/// - Tab resources will be cleaned up when the browser is recycled
1327/// - Failing here would discard a valid PDF
1328/// - Some errors (e.g., browser already closed) are expected
1329///
1330/// # Arguments
1331///
1332/// * `tab` - The browser tab to close
1333fn close_tab_safely(tab: &headless_chrome::Tab) {
1334 log::trace!("Closing browser tab");
1335
1336 if let Err(e) = tab.close(true) {
1337 // Log but don't fail - PDF generation already succeeded
1338 log::warn!(
1339 "Failed to close tab (continuing anyway, resources will be cleaned up): {}",
1340 e
1341 );
1342 } else {
1343 log::trace!("Tab closed successfully");
1344 }
1345}
1346
1347/// Truncate a URL for logging purposes.
1348///
1349/// Data URLs can be extremely long (containing entire HTML documents).
1350/// This function truncates them for readable log output.
1351///
1352/// # Arguments
1353///
1354/// * `url` - The URL to truncate
1355/// * `max_len` - Maximum length before truncation
1356///
1357/// # Returns
1358///
1359/// The URL, truncated with "..." if longer than `max_len`.
1360fn truncate_url(url: &str, max_len: usize) -> String {
1361 if url.len() <= max_len {
1362 url.to_string()
1363 } else {
1364 format!("{}...", &url[..max_len])
1365 }
1366}
1367
1368// ============================================================================
1369// Unit Tests
1370// ============================================================================
1371
1372#[cfg(test)]
1373mod tests {
1374 use super::*;
1375
1376 // -------------------------------------------------------------------------
1377 // URL Validation Tests
1378 // -------------------------------------------------------------------------
1379
1380 #[test]
1381 fn test_validate_url_valid_https() {
1382 let result = validate_url("https://example.com");
1383 assert!(result.is_ok());
1384 assert_eq!(result.unwrap(), "https://example.com/");
1385 }
1386
1387 #[test]
1388 fn test_validate_url_valid_http() {
1389 let result = validate_url("http://example.com/path?query=value");
1390 assert!(result.is_ok());
1391 }
1392
1393 #[test]
1394 fn test_validate_url_valid_with_port() {
1395 let result = validate_url("http://localhost:3000/api");
1396 assert!(result.is_ok());
1397 }
1398
1399 #[test]
1400 fn test_validate_url_empty() {
1401 let result = validate_url("");
1402 assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1403 }
1404
1405 #[test]
1406 fn test_validate_url_whitespace_only() {
1407 let result = validate_url(" ");
1408 assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1409 }
1410
1411 #[test]
1412 fn test_validate_url_no_scheme() {
1413 let result = validate_url("example.com");
1414 assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1415 }
1416
1417 #[test]
1418 fn test_validate_url_relative() {
1419 let result = validate_url("/path/to/page");
1420 assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1421 }
1422
1423 #[test]
1424 fn test_validate_url_data_url() {
1425 let result = validate_url("data:text/html,<h1>Hello</h1>");
1426 assert!(result.is_ok());
1427 }
1428
1429 #[test]
1430 fn test_validate_url_file_url() {
1431 let result = validate_url("file:///etc/passwd");
1432 assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1433 }
1434
1435 // -------------------------------------------------------------------------
1436 // Helper Function Tests
1437 // -------------------------------------------------------------------------
1438
1439 #[test]
1440 fn test_truncate_url_short() {
1441 let url = "https://example.com";
1442 assert_eq!(truncate_url(url, 50), url);
1443 }
1444
1445 #[test]
1446 fn test_truncate_url_long() {
1447 let url = "https://example.com/very/long/path/that/exceeds/the/maximum/length";
1448 let truncated = truncate_url(url, 30);
1449 assert_eq!(truncated.len(), 33); // 30 + "..."
1450 assert!(truncated.ends_with("..."));
1451 }
1452
1453 #[test]
1454 fn test_truncate_url_exact_length() {
1455 let url = "https://example.com";
1456 assert_eq!(truncate_url(url, url.len()), url);
1457 }
1458
1459 #[test]
1460 fn test_build_print_options_mapping() {
1461 let options = build_print_options(
1462 Some(true), // landscape
1463 Some(false), // display_header_footer
1464 Some(false), // print_background
1465 Some(1.5), // scale
1466 Some(8.5), // paper_width
1467 Some(11.0), // paper_height
1468 Some(0.5), // margin_top
1469 Some(0.5), // margin_bottom
1470 Some(0.5), // margin_left
1471 Some(0.5), // margin_right
1472 Some("1-5".to_string()), // page_ranges
1473 None, // header_template
1474 None, // footer_template
1475 Some(true), // prefer_css_page_size
1476 )
1477 .unwrap();
1478
1479 assert_eq!(options.landscape, Some(true));
1480 assert_eq!(options.display_header_footer, Some(false));
1481 assert_eq!(options.print_background, Some(false));
1482 assert_eq!(options.scale, Some(1.5));
1483 assert_eq!(options.margin_top, Some(0.5));
1484 assert_eq!(options.page_ranges, Some("1-5".to_string()));
1485 assert_eq!(options.prefer_css_page_size, Some(true));
1486 }
1487
1488 // -------------------------------------------------------------------------
1489 // Constants Tests
1490 // -------------------------------------------------------------------------
1491
1492 // -------------------------------------------------------------------------
1493
1494 #[test]
1495 #[allow(clippy::assertions_on_constants)]
1496 fn test_default_timeout_reasonable() {
1497 // Timeout should be at least 30 seconds for complex pages
1498 assert!(DEFAULT_TIMEOUT_SECS >= 30);
1499 // But not more than 5 minutes (would be too long)
1500 assert!(DEFAULT_TIMEOUT_SECS <= 300);
1501 }
1502
1503 #[test]
1504 #[allow(clippy::assertions_on_constants)]
1505 fn test_default_wait_reasonable() {
1506 // Wait should be at least 1 second for any JS
1507 assert!(DEFAULT_WAIT_SECS >= 1);
1508 // But not more than 30 seconds by default
1509 assert!(DEFAULT_WAIT_SECS <= 30);
1510 }
1511
1512 #[test]
1513 #[allow(clippy::assertions_on_constants)]
1514 fn test_poll_interval_reasonable() {
1515 // Poll interval should be at least 100ms (not too aggressive)
1516 assert!(JS_POLL_INTERVAL_MS >= 100);
1517 // But not more than 1 second (responsive enough)
1518 assert!(JS_POLL_INTERVAL_MS <= 1000);
1519 }
1520}