html2pdf_api/service/
pdf.rs

1//! Core PDF generation service (framework-agnostic).
2//!
3//! This module contains the core PDF generation logic that is shared across
4//! all web framework integrations. The functions here are **synchronous/blocking**
5//! and should be called from within a blocking context (e.g., `tokio::task::spawn_blocking`,
6//! `actix_web::web::block`, etc.).
7//!
8//! # Architecture
9//!
10//! ```text
11//! ┌─────────────────────────────────────────────────────────────────┐
12//! │                    Framework Integration                        │
13//! │              (Actix-web / Rocket / Axum)                        │
14//! └─────────────────────────┬───────────────────────────────────────┘
15//!                           │ async context
16//!                           ▼
17//! ┌─────────────────────────────────────────────────────────────────┐
18//! │              spawn_blocking / web::block                        │
19//! └─────────────────────────┬───────────────────────────────────────┘
20//!                           │ blocking context
21//!                           ▼
22//! ┌─────────────────────────────────────────────────────────────────┐
23//! │                  This Module (pdf.rs)                           │
24//! │  ┌─────────────────┐  ┌─────────────────┐  ┌─────────────────┐  │
25//! │  │generate_pdf_    │  │generate_pdf_    │  │get_pool_stats   │  │
26//! │  │from_url         │  │from_html        │  │                 │  │
27//! │  └────────┬────────┘  └────────┬────────┘  └─────────────────┘  │
28//! │           │                    │                                │
29//! │           └──────────┬─────────┘                                │
30//! │                      ▼                                          │
31//! │           ┌─────────────────────┐                               │
32//! │           │generate_pdf_internal│                               │
33//! │           └──────────┬──────────┘                               │
34//! └──────────────────────┼──────────────────────────────────────────┘
35//!                        │
36//!                        ▼
37//! ┌─────────────────────────────────────────────────────────────────┐
38//! │                    BrowserPool                                  │
39//! │                 (headless_chrome)                               │
40//! └─────────────────────────────────────────────────────────────────┘
41//! ```
42//!
43//! # Thread Safety
44//!
45//! All functions in this module are designed to be called from multiple threads
46//! concurrently. The browser pool is protected by a `Mutex`, and each PDF
47//! generation operation acquires a browser, uses it, and returns it to the pool
48//! automatically via RAII.
49//!
50//! # Blocking Behavior
51//!
52//! **Important:** These functions block the calling thread. In an async context,
53//! always wrap calls in a blocking task:
54//!
55//! ```rust,ignore
56//! // ✅ Correct: Using spawn_blocking
57//! let result = tokio::task::spawn_blocking(move || {
58//!     generate_pdf_from_url(&pool, &request)
59//! }).await?;
60//!
61//! // ❌ Wrong: Calling directly in async context
62//! // This will block the async runtime!
63//! let result = generate_pdf_from_url(&pool, &request);
64//! ```
65//!
66//! # Usage Examples
67//!
68//! ## Basic URL to PDF Conversion
69//!
70//! ```rust,ignore
71//! use html2pdf_api::service::{generate_pdf_from_url, PdfFromUrlRequest};
72//!
73//! // Assuming `pool` is a BrowserPool
74//! let request = PdfFromUrlRequest {
75//!     url: "https://example.com".to_string(),
76//!     ..Default::default()
77//! };
78//!
79//! // In a blocking context:
80//! let response = generate_pdf_from_url(&pool, &request)?;
81//! println!("Generated PDF: {} bytes", response.data.len());
82//! ```
83//!
84//! ## HTML to PDF Conversion
85//!
86//! ```rust,ignore
87//! use html2pdf_api::service::{generate_pdf_from_html, PdfFromHtmlRequest};
88//!
89//! let request = PdfFromHtmlRequest {
90//!     html: "<html><body><h1>Hello World</h1></body></html>".to_string(),
91//!     filename: Some("hello.pdf".to_string()),
92//!     ..Default::default()
93//! };
94//!
95//! let response = generate_pdf_from_html(&pool, &request)?;
96//! std::fs::write("hello.pdf", &response.data)?;
97//! ```
98//!
99//! ## With Async Web Framework
100//!
101//! ```rust,ignore
102//! use actix_web::{web, HttpResponse};
103//! use html2pdf_api::service::{generate_pdf_from_url, PdfFromUrlRequest};
104//!
105//! async fn handler(
106//!     pool: web::Data<SharedPool>,
107//!     query: web::Query<PdfFromUrlRequest>,
108//! ) -> HttpResponse {
109//!     let pool = pool.into_inner();
110//!     let request = query.into_inner();
111//!
112//!     let result = web::block(move || {
113//!         generate_pdf_from_url(&pool, &request)
114//!     }).await;
115//!
116//!     match result {
117//!         Ok(Ok(pdf)) => HttpResponse::Ok()
118//!             .content_type("application/pdf")
119//!             .body(pdf.data),
120//!         Ok(Err(e)) => HttpResponse::BadRequest().body(e.to_string()),
121//!         Err(e) => HttpResponse::InternalServerError().body(e.to_string()),
122//!     }
123//! }
124//! ```
125//!
126//! # Performance Considerations
127//!
128//! | Operation | Typical Duration | Notes |
129//! |-----------|------------------|-------|
130//! | Pool lock acquisition | < 1ms | Fast, non-blocking |
131//! | Browser checkout | < 1ms | If browser available |
132//! | Browser creation | 500ms - 2s | If pool needs to create new browser |
133//! | Page navigation | 100ms - 10s | Depends on target page |
134//! | JavaScript wait | 0 - 15s | Configurable via `waitsecs` |
135//! | PDF generation | 100ms - 5s | Depends on page complexity |
136//! | Tab cleanup | < 100ms | Best effort, non-blocking |
137//!
138//! # Error Handling
139//!
140//! All functions return `Result<T, PdfServiceError>`. Errors are categorized
141//! and include appropriate HTTP status codes. See [`PdfServiceError`] for
142//! the complete error taxonomy.
143//!
144//! [`PdfServiceError`]: crate::service::PdfServiceError
145
146use headless_chrome::types::PrintToPdfOptions;
147use std::time::{Duration, Instant};
148
149use crate::handle::BrowserHandle;
150use crate::pool::BrowserPool;
151use crate::service::types::*;
152
153// ============================================================================
154// Constants
155// ============================================================================
156
157/// Default timeout for the entire PDF generation operation in seconds.
158///
159/// This timeout encompasses the complete operation including:
160/// - Browser acquisition from pool
161/// - Page navigation
162/// - JavaScript execution wait
163/// - PDF rendering
164/// - Tab cleanup
165///
166/// If the operation exceeds this duration, a [`PdfServiceError::Timeout`]
167/// error is returned.
168///
169/// # Default Value
170///
171/// `60` seconds - sufficient for most web pages, including those with
172/// heavy JavaScript and external resources.
173///
174/// # Customization
175///
176/// This constant is used by framework integrations for their timeout wrappers.
177/// To customize, create your own timeout wrapper around the service functions.
178///
179/// ```rust,ignore
180/// use std::time::Duration;
181/// use tokio::time::timeout;
182///
183/// let custom_timeout = Duration::from_secs(120); // 2 minutes
184///
185/// let result = timeout(custom_timeout, async {
186///     tokio::task::spawn_blocking(move || {
187///         generate_pdf_from_url(&pool, &request)
188///     }).await
189/// }).await;
190/// ```
191pub const DEFAULT_TIMEOUT_SECS: u64 = 60;
192
193/// Default wait time for JavaScript execution in seconds.
194///
195/// After page navigation completes, the service waits for JavaScript to finish
196/// rendering dynamic content. This constant defines the default wait time when
197/// not specified in the request.
198///
199/// # Behavior
200///
201/// During the wait period, the service polls every 200ms for `window.isPageDone === true`.
202/// If the page sets this flag, PDF generation proceeds immediately. Otherwise,
203/// the full wait duration elapses before generating the PDF.
204///
205/// # Default Value
206///
207/// `5` seconds - balances between allowing time for JavaScript execution
208/// and not waiting unnecessarily for simple pages.
209///
210/// # Recommendations
211///
212/// | Page Type | Recommended Wait |
213/// |-----------|------------------|
214/// | Static HTML | 1-2 seconds |
215/// | Light JavaScript (vanilla JS, jQuery) | 3-5 seconds |
216/// | Heavy SPA (React, Vue, Angular) | 5-10 seconds |
217/// | Complex visualizations (D3, charts) | 10-15 seconds |
218/// | Real-time data loading | 10-20 seconds |
219pub const DEFAULT_WAIT_SECS: u64 = 5;
220
221/// Polling interval for JavaScript completion check in milliseconds.
222///
223/// When waiting for JavaScript to complete, the service checks for
224/// `window.isPageDone === true` at this interval.
225///
226/// # Trade-offs
227///
228/// - **Shorter interval**: More responsive but higher CPU usage
229/// - **Longer interval**: Lower CPU usage but may overshoot ready state
230///
231/// # Default Value
232///
233/// `200` milliseconds - provides good responsiveness without excessive polling.
234const JS_POLL_INTERVAL_MS: u64 = 200;
235
236// ============================================================================
237// Public API - Core PDF Generation Functions
238// ============================================================================
239
240/// Generate a PDF from a URL.
241///
242/// Navigates to the specified URL using a browser from the pool, waits for
243/// JavaScript execution, and generates a PDF of the rendered page.
244///
245/// # Thread Safety
246///
247/// This function is thread-safe and can be called concurrently from multiple
248/// threads. The browser pool mutex ensures safe access to shared resources.
249///
250/// # Blocking Behavior
251///
252/// **This function blocks the calling thread.** In async contexts, wrap it
253/// in `tokio::task::spawn_blocking`, `actix_web::web::block`, or similar.
254///
255/// # Arguments
256///
257/// * `pool` - Reference to the browser pool. The pool uses fine-grained internal locks;\n///   browser checkout is fast (~1ms) and concurrent.
258/// * `request` - PDF generation parameters. See [`PdfFromUrlRequest`] for details.
259///
260/// # Returns
261///
262/// * `Ok(PdfResponse)` - Successfully generated PDF with binary data and metadata
263/// * `Err(PdfServiceError)` - Error with details about what went wrong
264///
265/// # Errors
266///
267/// | Error | Cause | Resolution |
268/// |-------|-------|------------|
269/// | [`InvalidUrl`] | URL is empty or malformed | Provide valid HTTP/HTTPS URL |
270/// | [`BrowserUnavailable`] | Pool exhausted | Retry or increase pool size |
271/// | [`TabCreationFailed`] | Browser issue | Automatic recovery |
272/// | [`NavigationFailed`] | URL unreachable | Check URL accessibility |
273/// | [`NavigationTimeout`] | Page too slow | Increase timeout or optimize page |
274/// | [`PdfGenerationFailed`] | Rendering issue | Simplify page or check content |
275///
276/// [`InvalidUrl`]: PdfServiceError::InvalidUrl
277/// [`BrowserUnavailable`]: PdfServiceError::BrowserUnavailable
278/// [`TabCreationFailed`]: PdfServiceError::TabCreationFailed
279/// [`NavigationFailed`]: PdfServiceError::NavigationFailed
280/// [`NavigationTimeout`]: PdfServiceError::NavigationTimeout
281/// [`PdfGenerationFailed`]: PdfServiceError::PdfGenerationFailed
282///
283/// # Examples
284///
285/// ## Basic Usage
286///
287/// ```rust,ignore
288/// use html2pdf_api::service::{generate_pdf_from_url, PdfFromUrlRequest};
289///
290/// let request = PdfFromUrlRequest {
291///     url: "https://example.com".to_string(),
292///     ..Default::default()
293/// };
294///
295/// let response = generate_pdf_from_url(&pool, &request)?;
296/// assert!(response.data.starts_with(b"%PDF-")); // Valid PDF header
297/// ```
298///
299/// ## With Custom Options
300///
301/// ```rust,ignore
302/// let request = PdfFromUrlRequest {
303///     url: "https://example.com/report".to_string(),
304///     filename: Some("quarterly-report.pdf".to_string()),
305///     landscape: Some(true),      // Wide tables
306///     waitsecs: Some(10),         // Complex charts
307///     download: Some(true),       // Force download
308///     print_background: Some(true),
309/// };
310///
311/// let response = generate_pdf_from_url(&pool, &request)?;
312/// println!("Generated {} with {} bytes", response.filename, response.size());
313/// ```
314///
315/// ## Error Handling
316///
317/// ```rust,ignore
318/// match generate_pdf_from_url(&pool, &request) {
319///     Ok(pdf) => {
320///         // Success - use pdf.data
321///     }
322///     Err(PdfServiceError::InvalidUrl(msg)) => {
323///         // Client error - return 400
324///         eprintln!("Bad URL: {}", msg);
325///     }
326///     Err(PdfServiceError::BrowserUnavailable(_)) => {
327///         // Transient error - retry
328///         std::thread::sleep(Duration::from_secs(1));
329///     }
330///     Err(e) => {
331///         // Other error
332///         eprintln!("PDF generation failed: {}", e);
333///     }
334/// }
335/// ```
336///
337/// # Performance
338///
339/// Typical execution time breakdown for a moderately complex page:
340///
341/// ```text
342/// ┌────────────────────────────────────────────────────────────────┐
343/// │ Browser checkout                                       ~1ms    │
344/// │ ├─────────────────────────────────────────────────────────────┤
345/// │ Tab creation                                          ~50ms   │
346/// │ ├─────────────────────────────────────────────────────────────┤
347/// │ Navigation + page load                                ~500ms  │
348/// │ ├─────────────────────────────────────────────────────────────┤
349/// │ JavaScript wait (configurable)                        ~5000ms │
350/// │ ├─────────────────────────────────────────────────────────────┤
351/// │ PDF rendering                                         ~200ms  │
352/// │ ├─────────────────────────────────────────────────────────────┤
353/// │ Tab cleanup                                           ~50ms   │
354/// └────────────────────────────────────────────────────────────────┘
355/// Total: ~5.8 seconds (dominated by JS wait)
356/// ```
357pub fn generate_pdf_from_url(
358    pool: &BrowserPool,
359    request: &PdfFromUrlRequest,
360) -> Result<PdfResponse, PdfServiceError> {
361    // Validate URL before acquiring browser
362    let url = validate_url(&request.url)?;
363
364    log::debug!(
365        "Generating PDF from URL: {} (landscape={}, wait={}s)",
366        url,
367        request.is_landscape(),
368        request.wait_duration().as_secs()
369    );
370
371    // Acquire browser from pool (lock held briefly)
372    let browser = acquire_browser(pool)?;
373
374    let print_options = build_print_options(
375        request.landscape,
376        request.display_header_footer,
377        request.print_background,
378        request.scale,
379        request.paper_width,
380        request.paper_height,
381        request.margin_top,
382        request.margin_bottom,
383        request.margin_left,
384        request.margin_right,
385        request.page_ranges.clone(),
386        request.header_template.clone(),
387        request.footer_template.clone(),
388        request.prefer_css_page_size,
389    );
390
391    // Generate PDF (lock released, browser returned via RAII on completion/error)
392    let pdf_data = generate_pdf_internal(
393        &browser,
394        &url,
395        request.wait_duration(),
396        print_options,
397        false, // offline_mode is disabled for URLs
398    )?;
399
400    log::info!(
401        "✅ PDF generated successfully from URL: {} ({} bytes)",
402        url,
403        pdf_data.len()
404    );
405
406    Ok(PdfResponse::new(
407        pdf_data,
408        request.filename_or_default(),
409        request.is_download(),
410    ))
411}
412
413/// Generate a PDF from HTML content.
414///
415/// Loads the provided HTML content into a browser tab using a data URL,
416/// waits for any JavaScript execution, and generates a PDF.
417///
418/// # Thread Safety
419///
420/// This function is thread-safe and can be called concurrently from multiple
421/// threads. See [`generate_pdf_from_url`] for details.
422///
423/// # Blocking Behavior
424///
425/// **This function blocks the calling thread.** See [`generate_pdf_from_url`]
426/// for guidance on async usage.
427///
428/// # How It Works
429///
430/// The HTML content is converted to a data URL:
431///
432/// ```text
433/// data:text/html;charset=utf-8,<encoded-html-content>
434/// ```
435///
436/// This allows loading HTML directly without a web server. The browser
437/// renders the HTML as if it were loaded from a regular URL.
438///
439/// # Arguments
440///
441/// * `pool` - Reference to the mutex-wrapped browser pool
442/// * `request` - HTML content and generation parameters. See [`PdfFromHtmlRequest`].
443///
444/// # Returns
445///
446/// * `Ok(PdfResponse)` - Successfully generated PDF
447/// * `Err(PdfServiceError)` - Error details
448///
449/// # Errors
450///
451/// | Error | Cause | Resolution |
452/// |-------|-------|------------|
453/// | [`EmptyHtml`] | HTML content is empty/whitespace | Provide HTML content |
454/// | [`BrowserUnavailable`] | Pool exhausted | Retry or increase pool size |
455/// | [`NavigationFailed`] | HTML parsing issue | Check HTML validity |
456/// | [`PdfGenerationFailed`] | Rendering issue | Simplify HTML |
457///
458/// [`EmptyHtml`]: PdfServiceError::EmptyHtml
459/// [`BrowserUnavailable`]: PdfServiceError::BrowserUnavailable
460/// [`NavigationFailed`]: PdfServiceError::NavigationFailed
461/// [`PdfGenerationFailed`]: PdfServiceError::PdfGenerationFailed
462///
463/// # Limitations
464///
465/// ## External Resources
466///
467/// Since HTML is loaded via data URL, relative URLs don't work:
468///
469/// ```html
470/// <!-- ❌ Won't work - relative URL -->
471/// <img src="/images/logo.png">
472///
473/// <!-- ✅ Works - absolute URL -->
474/// <img src="https://example.com/images/logo.png">
475///
476/// <!-- ✅ Works - inline base64 -->
477/// <img src="data:image/png;base64,iVBORw0KGgo...">
478/// ```
479///
480/// ## Size Limits
481///
482/// Data URLs have browser-specific size limits. For very large HTML documents
483/// (> 1MB), consider:
484/// - Hosting the HTML on a temporary server
485/// - Using [`generate_pdf_from_url`] instead
486/// - Splitting into multiple PDFs
487///
488/// # Examples
489///
490/// ## Simple HTML
491///
492/// ```rust,ignore
493/// use html2pdf_api::service::{generate_pdf_from_html, PdfFromHtmlRequest};
494///
495/// let request = PdfFromHtmlRequest {
496///     html: "<h1>Hello World</h1><p>This is a test.</p>".to_string(),
497///     ..Default::default()
498/// };
499///
500/// let response = generate_pdf_from_html(&pool, &request)?;
501/// std::fs::write("output.pdf", &response.data)?;
502/// ```
503///
504/// ## Complete Document with Styling
505///
506/// ```rust,ignore
507/// let html = r#"
508/// <!DOCTYPE html>
509/// <html>
510/// <head>
511///     <meta charset="UTF-8">
512///     <style>
513///         body {
514///             font-family: 'Arial', sans-serif;
515///             margin: 40px;
516///             color: #333;
517///         }
518///         h1 {
519///             color: #0066cc;
520///             border-bottom: 2px solid #0066cc;
521///             padding-bottom: 10px;
522///         }
523///         table {
524///             width: 100%;
525///             border-collapse: collapse;
526///             margin-top: 20px;
527///         }
528///         th, td {
529///             border: 1px solid #ddd;
530///             padding: 12px;
531///             text-align: left;
532///         }
533///         th {
534///             background-color: #f5f5f5;
535///         }
536///     </style>
537/// </head>
538/// <body>
539///     <h1>Monthly Report</h1>
540///     <p>Generated on: 2024-01-15</p>
541///     <table>
542///         <tr><th>Metric</th><th>Value</th></tr>
543///         <tr><td>Revenue</td><td>$50,000</td></tr>
544///         <tr><td>Users</td><td>1,234</td></tr>
545///     </table>
546/// </body>
547/// </html>
548/// "#;
549///
550/// let request = PdfFromHtmlRequest {
551///     html: html.to_string(),
552///     filename: Some("monthly-report.pdf".to_string()),
553///     print_background: Some(true), // Include styled backgrounds
554///     ..Default::default()
555/// };
556///
557/// let response = generate_pdf_from_html(&pool, &request)?;
558/// ```
559///
560/// ## With Embedded Images
561///
562/// ```rust,ignore
563/// // Base64 encode an image
564/// let image_base64 = base64::encode(std::fs::read("logo.png")?);
565///
566/// let html = format!(r#"
567/// <!DOCTYPE html>
568/// <html>
569/// <body>
570///     <img src="data:image/png;base64,{}" alt="Logo">
571///     <h1>Company Report</h1>
572/// </body>
573/// </html>
574/// "#, image_base64);
575///
576/// let request = PdfFromHtmlRequest {
577///     html,
578///     ..Default::default()
579/// };
580///
581/// let response = generate_pdf_from_html(&pool, &request)?;
582/// ```
583pub fn generate_pdf_from_html(
584    pool: &BrowserPool,
585    request: &PdfFromHtmlRequest,
586) -> Result<PdfResponse, PdfServiceError> {
587    // Validate HTML content
588    if request.html.trim().is_empty() {
589        log::warn!("Empty HTML content provided");
590        return Err(PdfServiceError::EmptyHtml);
591    }
592
593    log::debug!(
594        "Generating PDF from HTML ({} bytes, landscape={}, wait={}s)",
595        request.html.len(),
596        request.is_landscape(),
597        request.wait_duration().as_secs()
598    );
599
600    // Acquire browser from pool
601    let browser = acquire_browser(pool)?;
602
603    // Convert HTML to data URL
604    // Using percent-encoding to handle special characters
605    let data_url = format!(
606        "data:text/html;charset=utf-8,{}",
607        urlencoding::encode(&request.html)
608    );
609
610    log::trace!("Data URL length: {} bytes", data_url.len());
611
612    let print_options = build_print_options(
613        request.landscape,
614        request.display_header_footer,
615        request.print_background,
616        request.scale,
617        request.paper_width,
618        request.paper_height,
619        request.margin_top,
620        request.margin_bottom,
621        request.margin_left,
622        request.margin_right,
623        request.page_ranges.clone(),
624        request.header_template.clone(),
625        request.footer_template.clone(),
626        request.prefer_css_page_size,
627    );
628
629    // Generate PDF
630    let pdf_data = generate_pdf_internal(
631        &browser,
632        &data_url,
633        request.wait_duration(),
634        print_options,
635        request.offline_mode.unwrap_or(false),
636    )?;
637
638    log::info!(
639        "✅ PDF generated successfully from HTML ({} bytes input → {} bytes output)",
640        request.html.len(),
641        pdf_data.len()
642    );
643
644    Ok(PdfResponse::new(
645        pdf_data,
646        request.filename_or_default(),
647        request.is_download(),
648    ))
649}
650
651/// Get current browser pool statistics.
652///
653/// Returns real-time metrics about the browser pool state including
654/// available browsers, active browsers, and total count.
655///
656/// # Thread Safety
657///
658/// This function briefly acquires the pool lock to read statistics.
659/// It's safe to call frequently for monitoring purposes.
660///
661/// # Blocking Behavior
662///
663/// This function is fast (< 1ms) as it reads from the pool's internal
664/// state. Safe to call frequently from health check endpoints.
665///
666/// # Arguments
667///
668/// * `pool` - Reference to the browser pool
669///
670/// # Returns
671///
672/// * `Ok(PoolStatsResponse)` - Current pool statistics
673///
674/// # Examples
675///
676/// ## Basic Usage
677///
678/// ```rust,ignore
679/// use html2pdf_api::service::get_pool_stats;
680///
681/// let stats = get_pool_stats(&pool)?;
682/// println!("Available: {}", stats.available);
683/// println!("Active: {}", stats.active);
684/// println!("Total: {}", stats.total);
685/// ```
686///
687/// ## Monitoring Integration
688///
689/// ```rust,ignore
690/// use prometheus::{Gauge, register_gauge};
691///
692/// lazy_static! {
693///     static ref POOL_AVAILABLE: Gauge = register_gauge!(
694///         "browser_pool_available",
695///         "Number of available browsers in pool"
696///     ).unwrap();
697///     static ref POOL_ACTIVE: Gauge = register_gauge!(
698///         "browser_pool_active",
699///         "Number of active browsers in pool"
700///     ).unwrap();
701/// }
702///
703/// fn update_metrics(pool: &Mutex<BrowserPool>) {
704///     if let Ok(stats) = get_pool_stats(pool) {
705///         POOL_AVAILABLE.set(stats.available as f64);
706///         POOL_ACTIVE.set(stats.active as f64);
707///     }
708/// }
709/// ```
710///
711/// ## Capacity Check
712///
713/// ```rust,ignore
714/// let stats = get_pool_stats(&pool)?;
715///
716/// if stats.available == 0 {
717///     log::warn!("No browsers available, requests may be delayed");
718/// }
719///
720/// let utilization = stats.active as f64 / stats.total.max(1) as f64;
721/// if utilization > 0.8 {
722///     log::warn!("Pool utilization at {:.0}%, consider scaling", utilization * 100.0);
723/// }
724/// ```
725pub fn get_pool_stats(pool: &BrowserPool) -> Result<PoolStatsResponse, PdfServiceError> {
726    let stats = pool.stats();
727
728    Ok(PoolStatsResponse {
729        available: stats.available,
730        active: stats.active,
731        total: stats.total,
732    })
733}
734
735/// Check if the browser pool is ready to handle requests.
736///
737/// Returns `true` if the pool has available browsers or capacity to create
738/// new ones. This is useful for readiness probes in container orchestration.
739///
740/// # Readiness Criteria
741///
742/// The pool is considered "ready" if either:
743/// - There are idle browsers available (`available > 0`), OR
744/// - There is capacity to create new browsers (`active < max_pool_size`)
745///
746/// The pool is "not ready" only when:
747/// - All browsers are in use AND the pool is at maximum capacity
748///
749/// # Arguments
750///
751/// * `pool` - Reference to the browser pool
752///
753/// # Returns
754///
755/// * `Ok(true)` - Pool can accept new requests
756/// * `Ok(false)` - Pool is at capacity, requests will queue
757///
758/// # Use Cases
759///
760/// ## Kubernetes Readiness Probe
761///
762/// ```yaml
763/// readinessProbe:
764///   httpGet:
765///     path: /ready
766///     port: 8080
767///   initialDelaySeconds: 5
768///   periodSeconds: 10
769/// ```
770///
771/// ## Load Balancer Health Check
772///
773/// When `is_pool_ready` returns `false`, the endpoint should return
774/// HTTP 503 Service Unavailable to remove the instance from rotation.
775///
776/// # Examples
777///
778/// ## Basic Check
779///
780/// ```rust,ignore
781/// use html2pdf_api::service::is_pool_ready;
782///
783/// if is_pool_ready(&pool)? {
784///     println!("Pool is ready to accept requests");
785/// } else {
786///     println!("Pool is at capacity");
787/// }
788/// ```
789///
790/// ## Request Gating
791///
792/// ```rust,ignore
793/// async fn handle_request(pool: &Mutex<BrowserPool>, request: PdfFromUrlRequest) -> Result<PdfResponse, Error> {
794///     // Quick capacity check before expensive operation
795///     if !is_pool_ready(pool)? {
796///         return Err(Error::ServiceUnavailable("Pool at capacity, try again later"));
797///     }
798///     
799///     // Proceed with PDF generation
800///     generate_pdf_from_url(pool, &request)
801/// }
802/// ```
803pub fn is_pool_ready(pool: &BrowserPool) -> Result<bool, PdfServiceError> {
804    let stats = pool.stats();
805    let config = pool.config();
806
807    // Ready if we have available browsers OR we can create more
808    let is_ready = stats.available > 0 || stats.active < config.max_pool_size;
809
810    log::trace!(
811        "Pool readiness check: available={}, active={}, max={}, ready={}",
812        stats.available,
813        stats.active,
814        config.max_pool_size,
815        is_ready
816    );
817
818    Ok(is_ready)
819}
820
821// ============================================================================
822// Internal Helper Functions
823// ============================================================================
824
825/// Validate and normalize a URL string.
826///
827/// Parses the URL using the `url` crate and returns the normalized form.
828/// This catches malformed URLs early, before acquiring a browser.
829///
830/// # Validation Rules
831///
832/// - URL must not be empty
833/// - URL must be parseable by the `url` crate
834/// - Scheme must be present (http/https/file/data)
835///
836/// # Arguments
837///
838/// * `url` - The URL string to validate
839///
840/// # Returns
841///
842/// * `Ok(String)` - The normalized URL
843/// * `Err(PdfServiceError::InvalidUrl)` - If validation fails
844///
845/// # Examples
846///
847/// ```rust,ignore
848/// assert!(validate_url("https://example.com").is_ok());
849/// assert!(validate_url("").is_err());
850/// assert!(validate_url("not-a-url").is_err());
851/// ```
852fn validate_url(url: &str) -> Result<String, PdfServiceError> {
853    // Check for empty URL first (better error message)
854    if url.trim().is_empty() {
855        log::debug!("URL validation failed: empty URL");
856        return Err(PdfServiceError::InvalidUrl("URL is required".to_string()));
857    }
858
859    // Parse and normalize the URL
860    match url::Url::parse(url) {
861        Ok(parsed) => {
862            let scheme = parsed.scheme();
863            if scheme != "http" && scheme != "https" && scheme != "data" {
864                log::debug!("URL validation failed: unsupported scheme '{}'", scheme);
865                return Err(PdfServiceError::InvalidUrl(format!(
866                    "Unsupported URL scheme '{}'. Only http, https, and data are allowed",
867                    scheme
868                )));
869            }
870            log::trace!("URL validated successfully: {}", parsed);
871            Ok(parsed.to_string())
872        }
873        Err(e) => {
874            log::debug!("URL validation failed for '{}': {}", url, e);
875            Err(PdfServiceError::InvalidUrl(e.to_string()))
876        }
877    }
878}
879
880/// Acquire a browser from the pool.
881///
882/// Locks the pool mutex, retrieves a browser, and returns it. The lock is
883/// released immediately after checkout, not held during PDF generation.
884///
885/// # Browser Lifecycle
886///
887/// The returned `BrowserHandle` uses RAII to automatically return the
888/// browser to the pool when dropped:
889///
890/// ```text
891/// ┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
892/// │  acquire_browser │ ──▶ │  BrowserHandle  │ ──▶ │  PDF Generation │
893/// │  (lock, get)     │     │  (RAII guard)   │     │  (uses browser) │
894/// └─────────────────┘     └─────────────────┘     └────────┬────────┘
895///                                                          │
896///                                                          ▼
897///                         ┌─────────────────┐     ┌─────────────────┐
898///                         │  Back to Pool   │ ◀── │  Drop Handle    │
899///                         │  (automatic)    │     │  (RAII cleanup) │
900///                         └─────────────────┘     └─────────────────┘
901/// ```
902///
903/// # Arguments
904///
905/// * `pool` - Reference to the mutex-wrapped browser pool
906///
907/// # Returns
908///
909/// * `Ok(BrowserHandle)` - A browser ready for use
910/// * `Err(PdfServiceError)` - If pool lock or browser acquisition fails
911fn acquire_browser(pool: &BrowserPool) -> Result<BrowserHandle, PdfServiceError> {
912    // Get a browser from the pool (no outer lock needed — pool uses internal locks)
913    let browser = pool.get().map_err(|e| {
914        log::error!("❌ Failed to get browser from pool: {}", e);
915        PdfServiceError::BrowserUnavailable(e.to_string())
916    })?;
917
918    log::debug!("Acquired browser {} from pool", browser.id());
919
920    Ok(browser)
921}
922
923/// Core PDF generation logic.
924///
925/// This function performs the actual work of:
926/// 1. Creating a new browser tab
927/// 2. Navigating to the URL
928/// 3. Waiting for JavaScript completion
929/// 4. Generating the PDF
930/// 5. Cleaning up the tab
931///
932/// # Arguments
933///
934/// * `browser` - Browser handle from the pool
935/// * `url` - URL to navigate to (can be http/https or data: URL)
936/// * `wait_duration` - How long to wait for JavaScript
937/// * `landscape` - Whether to use landscape orientation
938/// * `print_background` - Whether to include background graphics
939///
940/// # Returns
941///
942/// * `Ok(Vec<u8>)` - The raw PDF binary data
943/// * `Err(PdfServiceError)` - If any step fails
944///
945/// # Tab Lifecycle
946///
947/// A new tab is created for each PDF generation and closed afterward.
948/// This ensures clean state and prevents memory leaks from accumulating
949/// page resources.
950///
951/// ```text
952/// Browser Instance
953/// ├── Tab 1 (new) ◀── Created for this request
954/// │   ├── Navigate to URL
955/// │   ├── Wait for JS
956/// │   ├── Generate PDF
957/// │   └── Close tab ◀── Cleanup
958/// └── (available for next request)
959/// ```
960fn generate_pdf_internal(
961    browser: &BrowserHandle,
962    url: &str,
963    wait_duration: Duration,
964    print_options: Option<PrintToPdfOptions>,
965    offline_mode: bool,
966) -> Result<Vec<u8>, PdfServiceError> {
967    let start_time = Instant::now();
968
969    // Create new tab
970    log::trace!("Creating new browser tab");
971    let tab = browser.new_tab().map_err(|e| {
972        log::error!("❌ Failed to create tab: {}", e);
973        browser.mark_unhealthy(); // Poison pill prevention
974        PdfServiceError::TabCreationFailed(e.to_string())
975    })?;
976
977    if offline_mode {
978        log::info!("🛡️ Enabling CDP Offline Mode (SSRF Defense)");
979        let _ = tab
980            .call_method(
981                headless_chrome::protocol::cdp::Network::EmulateNetworkConditions {
982                    offline: true,
983                    latency: 0.0,
984                    download_throughput: 0.0,
985                    upload_throughput: 0.0,
986                    connection_Type: None,
987                    packet_loss: None,
988                    packet_queue_length: None,
989                    packet_reordering: None,
990                },
991            )
992            .map_err(|e| log::warn!("Failed to apply offline mode: {}", e));
993    }
994
995    // Navigate to URL
996    log::trace!("Navigating to URL: {}", truncate_url(url, 100));
997    let nav_start = Instant::now();
998
999    let page = tab
1000        .navigate_to(url)
1001        .map_err(|e| {
1002            log::error!("❌ Failed to navigate to URL: {}", e);
1003            browser.mark_unhealthy();
1004            PdfServiceError::NavigationFailed(e.to_string())
1005        })?
1006        .wait_until_navigated()
1007        .map_err(|e| {
1008            log::error!("❌ Navigation timeout: {}", e);
1009            browser.mark_unhealthy();
1010            PdfServiceError::NavigationTimeout(e.to_string())
1011        })?;
1012
1013    log::debug!("Navigation completed in {:?}", nav_start.elapsed());
1014
1015    // Wait for JavaScript execution
1016    wait_for_page_ready(&tab, wait_duration);
1017
1018    // Generate PDF
1019    log::trace!("Generating PDF");
1020    let pdf_start = Instant::now();
1021
1022    let pdf_data = page.print_to_pdf(print_options).map_err(|e| {
1023        log::error!("❌ Failed to generate PDF: {}", e);
1024        PdfServiceError::PdfGenerationFailed(e.to_string())
1025    })?;
1026
1027    log::debug!(
1028        "PDF generated in {:?} ({} bytes)",
1029        pdf_start.elapsed(),
1030        pdf_data.len()
1031    );
1032
1033    // Close tab (best effort - don't fail if this doesn't work)
1034    close_tab_safely(&tab);
1035
1036    log::debug!("Total PDF generation time: {:?}", start_time.elapsed());
1037
1038    Ok(pdf_data)
1039}
1040
1041/// Build PDF print options.
1042///
1043/// Creates the `PrintToPdfOptions` struct with the specified settings
1044/// and sensible defaults for margins and other options.
1045///
1046/// # Default Settings
1047///
1048/// - **Margins**: All set to 0 (full page)
1049/// - **Header/Footer**: Disabled
1050/// - **Background**: Configurable (default: true)
1051/// - **Scale**: 1.0 (100%)
1052#[allow(clippy::too_many_arguments)]
1053fn build_print_options(
1054    landscape: Option<bool>,
1055    display_header_footer: Option<bool>,
1056    print_background: Option<bool>,
1057    scale: Option<f64>,
1058    paper_width: Option<f64>,
1059    paper_height: Option<f64>,
1060    margin_top: Option<f64>,
1061    margin_bottom: Option<f64>,
1062    margin_left: Option<f64>,
1063    margin_right: Option<f64>,
1064    page_ranges: Option<String>,
1065    header_template: Option<String>,
1066    footer_template: Option<String>,
1067    prefer_css_page_size: Option<bool>,
1068) -> Option<PrintToPdfOptions> {
1069    Some(PrintToPdfOptions {
1070        landscape,
1071        display_header_footer,
1072        print_background,
1073        scale,
1074        paper_width,
1075        paper_height,
1076        margin_top,
1077        margin_bottom,
1078        margin_left,
1079        margin_right,
1080        page_ranges,
1081        header_template,
1082        footer_template,
1083        prefer_css_page_size,
1084        ..Default::default()
1085    })
1086}
1087
1088/// Wait for the page to signal it's ready for PDF generation.
1089///
1090/// This function implements a polling loop that checks for `window.isPageDone === true`.
1091/// This allows JavaScript-heavy pages to signal when they've finished rendering,
1092/// enabling early PDF generation without waiting the full timeout.
1093///
1094/// # Behavior Summary
1095///
1096/// | Page State | Result |
1097/// |------------|--------|
1098/// | `window.isPageDone = true` | Returns **immediately** (early exit) |
1099/// | `window.isPageDone = false` | Waits **full duration** |
1100/// | `window.isPageDone` not defined | Waits **full duration** |
1101/// | JavaScript error during check | Waits **full duration** |
1102///
1103/// # Default Behavior (No Flag Set)
1104///
1105/// **Important:** If the page does not set `window.isPageDone = true`, this function
1106/// waits the **full `max_wait` duration** before returning. This is intentional -
1107/// it gives JavaScript-heavy pages time to render even without explicit signaling.
1108///
1109/// For example, with the default `waitsecs = 5`:
1110/// - A page **with** the flag set immediately: ~0ms wait
1111/// - A page **without** the flag: full 5000ms wait
1112///
1113/// # How It Works
1114///
1115/// ```text
1116/// ┌─────────────────────────────────────────────────────────────────┐
1117/// │                    wait_for_page_ready                          │
1118/// │                                                                 │
1119/// │   ┌─────────┐     ┌──────────────┐     ┌─────────────────────┐  │
1120/// │   │  Start  │────▶│ Check flag   │────▶│ window.isPageDone?  │  │
1121/// │   └─────────┘     └──────────────┘     └──────────┬──────────┘  │
1122/// │                                                   │             │
1123/// │                          ┌────────────────────────┼─────────┐   │
1124/// │                          │                        │         │   │
1125/// │                          ▼                        ▼         │   │
1126/// │                   ┌────────────┐           ┌───────────┐    │   │
1127/// │                   │   true     │           │  false /  │    │   │
1128/// │                   │ (ready!)   │           │ undefined │    │   │
1129/// │                   └─────┬──────┘           └─────┬─────┘    │   │
1130/// │                         │                        │          │   │
1131/// │                         ▼                        ▼          │   │
1132/// │                   ┌───────────┐           ┌───────────┐     │   │
1133/// │                   │  Return   │           │ Sleep     │     │   │
1134/// │                   │  early    │           │ 200ms     │─────┘   │
1135/// │                   └───────────┘           └───────────┘         │
1136/// │                                                  │              │
1137/// │                                                  ▼              │
1138/// │                                           ┌───────────┐         │
1139/// │                                           │ Timeout?  │         │
1140/// │                                           └─────┬─────┘         │
1141/// │                                                 │               │
1142/// │                                    ┌────────────┴────────────┐  │
1143/// │                                    ▼                         ▼  │
1144/// │                             ┌───────────┐              ┌──────┐ │
1145/// │                             │   Yes     │              │  No  │ │
1146/// │                             │ (proceed) │              │(loop)│ │
1147/// │                             └───────────┘              └──────┘ │
1148/// └─────────────────────────────────────────────────────────────────┘
1149/// ```
1150///
1151/// # Polling Timeline
1152///
1153/// The function polls every 200ms (see `JS_POLL_INTERVAL_MS`):
1154///
1155/// ```text
1156/// Time:   0ms    200ms   400ms   600ms   800ms  ...  5000ms
1157///          │       │       │       │       │           │
1158///          ▼       ▼       ▼       ▼       ▼           ▼
1159///        Poll    Poll    Poll    Poll    Poll  ...   Timeout
1160///          │       │       │       │       │           │
1161///          └───────┴───────┴───────┴───────┴───────────┤
1162///                                                      ▼
1163///                                              Proceed to PDF
1164///
1165/// If window.isPageDone = true at any poll → Exit immediately
1166/// ```
1167///
1168/// Each poll executes this JavaScript:
1169///
1170/// ```javascript
1171/// window.isPageDone === true  // Returns true, false, or undefined
1172/// ```
1173///
1174/// - `true` → Function returns immediately
1175/// - `false` / `undefined` / error → Continue polling until timeout
1176///
1177/// # Page-Side Implementation (Optional)
1178///
1179/// To enable early completion and avoid unnecessary waiting, add this to your
1180/// page's JavaScript **after** all content is rendered:
1181///
1182/// ```javascript
1183/// // Signal that the page is ready for PDF generation
1184/// window.isPageDone = true;
1185/// ```
1186///
1187/// ## Framework Examples
1188///
1189/// **React:**
1190/// ```javascript
1191/// useEffect(() => {
1192///     fetchData().then((result) => {
1193///         setData(result);
1194///         // Signal ready after state update and re-render
1195///         setTimeout(() => { window.isPageDone = true; }, 0);
1196///     });
1197/// }, []);
1198/// ```
1199///
1200/// **Vue:**
1201/// ```javascript
1202/// mounted() {
1203///     this.loadData().then(() => {
1204///         this.$nextTick(() => {
1205///             window.isPageDone = true;
1206///         });
1207///     });
1208/// }
1209/// ```
1210///
1211/// **Vanilla JavaScript:**
1212/// ```javascript
1213/// document.addEventListener('DOMContentLoaded', async () => {
1214///     await loadDynamicContent();
1215///     await renderCharts();
1216///     window.isPageDone = true;  // All done!
1217/// });
1218/// ```
1219///
1220/// # When to Increase `waitsecs`
1221///
1222/// If you cannot modify the target page to set `window.isPageDone`, increase
1223/// `waitsecs` based on the page complexity:
1224///
1225/// | Page Type | Recommended `waitsecs` |
1226/// |-----------|------------------------|
1227/// | Static HTML (no JS) | 1 |
1228/// | Light JS (form validation, simple DOM) | 2-3 |
1229/// | Moderate JS (API calls, dynamic content) | 5 (default) |
1230/// | Heavy SPA (React, Vue, Angular) | 5-10 |
1231/// | Complex visualizations (D3, charts, maps) | 10-15 |
1232/// | Pages loading external resources | 10-20 |
1233///
1234/// # Performance Optimization
1235///
1236/// For high-throughput scenarios, implementing `window.isPageDone` on your
1237/// pages can significantly improve performance:
1238///
1239/// ```text
1240/// Without flag (5s default wait):
1241///     Request 1: ████████████████████ 5.2s
1242///     Request 2: ████████████████████ 5.1s
1243///     Request 3: ████████████████████ 5.3s
1244///     Average: 5.2s per PDF
1245///
1246/// With flag (page ready in 800ms):
1247///     Request 1: ████ 0.9s
1248///     Request 2: ████ 0.8s
1249///     Request 3: ████ 0.9s
1250///     Average: 0.87s per PDF (6x faster!)
1251/// ```
1252///
1253/// # Arguments
1254///
1255/// * `tab` - The browser tab to check. Must have completed navigation.
1256/// * `max_wait` - Maximum time to wait before proceeding with PDF generation.
1257///   This is the upper bound; the function may return earlier if the page
1258///   signals readiness.
1259///
1260/// # Returns
1261///
1262/// This function returns `()` (unit). It either:
1263/// - Returns early when `window.isPageDone === true` is detected
1264/// - Returns after `max_wait` duration has elapsed (timeout)
1265///
1266/// In both cases, PDF generation proceeds afterward. This function never fails -
1267/// timeout is a normal completion path, not an error.
1268///
1269/// # Thread Blocking
1270///
1271/// This function blocks the calling thread with `std::thread::sleep()`.
1272/// Always call from within a blocking context (e.g., `spawn_blocking`).
1273///
1274/// # Example
1275///
1276/// ```rust,ignore
1277/// // Navigate to page first
1278/// let page = tab.navigate_to(url)?.wait_until_navigated()?;
1279///
1280/// // Wait up to 10 seconds for JavaScript
1281/// wait_for_page_ready(&tab, Duration::from_secs(10));
1282///
1283/// // Now generate PDF - page is either ready or we've waited long enough
1284/// let pdf_data = page.print_to_pdf(options)?;
1285/// ```
1286fn wait_for_page_ready(tab: &headless_chrome::Tab, max_wait: Duration) {
1287    let start = Instant::now();
1288    let poll_interval = Duration::from_millis(JS_POLL_INTERVAL_MS);
1289
1290    log::trace!(
1291        "Waiting up to {:?} for page to be ready (polling every {:?})",
1292        max_wait,
1293        poll_interval
1294    );
1295
1296    while start.elapsed() < max_wait {
1297        // Check if page signals completion
1298        let is_done = tab
1299            .evaluate("window.isPageDone === true", false)
1300            .map(|result| result.value.and_then(|v| v.as_bool()).unwrap_or(false))
1301            .unwrap_or(false);
1302
1303        if is_done {
1304            log::debug!("Page signaled ready after {:?}", start.elapsed());
1305            return;
1306        }
1307
1308        // Sleep before next poll
1309        std::thread::sleep(poll_interval);
1310    }
1311
1312    log::debug!(
1313        "Page wait completed after {:?} (timeout, proceeding anyway)",
1314        start.elapsed()
1315    );
1316}
1317
1318/// Safely close a browser tab, ignoring errors.
1319///
1320/// Tab cleanup is best-effort. If it fails, we log a warning but don't
1321/// propagate the error since the PDF generation already succeeded.
1322///
1323/// # Why Best-Effort?
1324///
1325/// - The PDF data is already captured
1326/// - Tab resources will be cleaned up when the browser is recycled
1327/// - Failing here would discard a valid PDF
1328/// - Some errors (e.g., browser already closed) are expected
1329///
1330/// # Arguments
1331///
1332/// * `tab` - The browser tab to close
1333fn close_tab_safely(tab: &headless_chrome::Tab) {
1334    log::trace!("Closing browser tab");
1335
1336    if let Err(e) = tab.close(true) {
1337        // Log but don't fail - PDF generation already succeeded
1338        log::warn!(
1339            "Failed to close tab (continuing anyway, resources will be cleaned up): {}",
1340            e
1341        );
1342    } else {
1343        log::trace!("Tab closed successfully");
1344    }
1345}
1346
1347/// Truncate a URL for logging purposes.
1348///
1349/// Data URLs can be extremely long (containing entire HTML documents).
1350/// This function truncates them for readable log output.
1351///
1352/// # Arguments
1353///
1354/// * `url` - The URL to truncate
1355/// * `max_len` - Maximum length before truncation
1356///
1357/// # Returns
1358///
1359/// The URL, truncated with "..." if longer than `max_len`.
1360fn truncate_url(url: &str, max_len: usize) -> String {
1361    if url.len() <= max_len {
1362        url.to_string()
1363    } else {
1364        format!("{}...", &url[..max_len])
1365    }
1366}
1367
1368// ============================================================================
1369// Unit Tests
1370// ============================================================================
1371
1372#[cfg(test)]
1373mod tests {
1374    use super::*;
1375
1376    // -------------------------------------------------------------------------
1377    // URL Validation Tests
1378    // -------------------------------------------------------------------------
1379
1380    #[test]
1381    fn test_validate_url_valid_https() {
1382        let result = validate_url("https://example.com");
1383        assert!(result.is_ok());
1384        assert_eq!(result.unwrap(), "https://example.com/");
1385    }
1386
1387    #[test]
1388    fn test_validate_url_valid_http() {
1389        let result = validate_url("http://example.com/path?query=value");
1390        assert!(result.is_ok());
1391    }
1392
1393    #[test]
1394    fn test_validate_url_valid_with_port() {
1395        let result = validate_url("http://localhost:3000/api");
1396        assert!(result.is_ok());
1397    }
1398
1399    #[test]
1400    fn test_validate_url_empty() {
1401        let result = validate_url("");
1402        assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1403    }
1404
1405    #[test]
1406    fn test_validate_url_whitespace_only() {
1407        let result = validate_url("   ");
1408        assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1409    }
1410
1411    #[test]
1412    fn test_validate_url_no_scheme() {
1413        let result = validate_url("example.com");
1414        assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1415    }
1416
1417    #[test]
1418    fn test_validate_url_relative() {
1419        let result = validate_url("/path/to/page");
1420        assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1421    }
1422
1423    #[test]
1424    fn test_validate_url_data_url() {
1425        let result = validate_url("data:text/html,<h1>Hello</h1>");
1426        assert!(result.is_ok());
1427    }
1428
1429    #[test]
1430    fn test_validate_url_file_url() {
1431        let result = validate_url("file:///etc/passwd");
1432        assert!(matches!(result, Err(PdfServiceError::InvalidUrl(_))));
1433    }
1434
1435    // -------------------------------------------------------------------------
1436    // Helper Function Tests
1437    // -------------------------------------------------------------------------
1438
1439    #[test]
1440    fn test_truncate_url_short() {
1441        let url = "https://example.com";
1442        assert_eq!(truncate_url(url, 50), url);
1443    }
1444
1445    #[test]
1446    fn test_truncate_url_long() {
1447        let url = "https://example.com/very/long/path/that/exceeds/the/maximum/length";
1448        let truncated = truncate_url(url, 30);
1449        assert_eq!(truncated.len(), 33); // 30 + "..."
1450        assert!(truncated.ends_with("..."));
1451    }
1452
1453    #[test]
1454    fn test_truncate_url_exact_length() {
1455        let url = "https://example.com";
1456        assert_eq!(truncate_url(url, url.len()), url);
1457    }
1458
1459    #[test]
1460    fn test_build_print_options_mapping() {
1461        let options = build_print_options(
1462            Some(true),              // landscape
1463            Some(false),             // display_header_footer
1464            Some(false),             // print_background
1465            Some(1.5),               // scale
1466            Some(8.5),               // paper_width
1467            Some(11.0),              // paper_height
1468            Some(0.5),               // margin_top
1469            Some(0.5),               // margin_bottom
1470            Some(0.5),               // margin_left
1471            Some(0.5),               // margin_right
1472            Some("1-5".to_string()), // page_ranges
1473            None,                    // header_template
1474            None,                    // footer_template
1475            Some(true),              // prefer_css_page_size
1476        )
1477        .unwrap();
1478
1479        assert_eq!(options.landscape, Some(true));
1480        assert_eq!(options.display_header_footer, Some(false));
1481        assert_eq!(options.print_background, Some(false));
1482        assert_eq!(options.scale, Some(1.5));
1483        assert_eq!(options.margin_top, Some(0.5));
1484        assert_eq!(options.page_ranges, Some("1-5".to_string()));
1485        assert_eq!(options.prefer_css_page_size, Some(true));
1486    }
1487
1488    // -------------------------------------------------------------------------
1489    // Constants Tests
1490    // -------------------------------------------------------------------------
1491
1492    // -------------------------------------------------------------------------
1493
1494    #[test]
1495    #[allow(clippy::assertions_on_constants)]
1496    fn test_default_timeout_reasonable() {
1497        // Timeout should be at least 30 seconds for complex pages
1498        assert!(DEFAULT_TIMEOUT_SECS >= 30);
1499        // But not more than 5 minutes (would be too long)
1500        assert!(DEFAULT_TIMEOUT_SECS <= 300);
1501    }
1502
1503    #[test]
1504    #[allow(clippy::assertions_on_constants)]
1505    fn test_default_wait_reasonable() {
1506        // Wait should be at least 1 second for any JS
1507        assert!(DEFAULT_WAIT_SECS >= 1);
1508        // But not more than 30 seconds by default
1509        assert!(DEFAULT_WAIT_SECS <= 30);
1510    }
1511
1512    #[test]
1513    #[allow(clippy::assertions_on_constants)]
1514    fn test_poll_interval_reasonable() {
1515        // Poll interval should be at least 100ms (not too aggressive)
1516        assert!(JS_POLL_INTERVAL_MS >= 100);
1517        // But not more than 1 second (responsive enough)
1518        assert!(JS_POLL_INTERVAL_MS <= 1000);
1519    }
1520}
html2pdf_api/service/pdf.rs

html2pdf_api/service/
pdf.rs