reasonkit_web/browser/
capture.rs

1//! Page capture functionality
2//!
3//! This module handles screenshots, PDFs, MHTML snapshots, and HTML capture.
4
5use crate::browser::PageHandle;
6use crate::error::{CaptureError, Result};
7use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
8use chromiumoxide::cdp::browser_protocol::page::{
9    CaptureScreenshotFormat, CaptureSnapshotFormat, CaptureSnapshotParams, PrintToPdfParams,
10};
11use chromiumoxide::page::ScreenshotParams;
12use serde::{Deserialize, Serialize};
13use tracing::{debug, info, instrument};
14
15/// Format for captures
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
17#[serde(rename_all = "lowercase")]
18pub enum CaptureFormat {
19    /// PNG screenshot
20    #[default]
21    Png,
22    /// JPEG screenshot
23    Jpeg,
24    /// WebP screenshot
25    Webp,
26    /// PDF document
27    Pdf,
28    /// MHTML archive
29    Mhtml,
30    /// Raw HTML
31    Html,
32}
33
34/// Options for capture operations
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct CaptureOptions {
37    /// Capture format
38    #[serde(default)]
39    pub format: CaptureFormat,
40    /// JPEG/WebP quality (0-100)
41    #[serde(default = "default_quality")]
42    pub quality: u8,
43    /// Capture full page (not just viewport)
44    #[serde(default = "default_true")]
45    pub full_page: bool,
46    /// Viewport width for capture
47    pub width: Option<u32>,
48    /// Viewport height for capture
49    pub height: Option<u32>,
50    /// CSS selector to clip to
51    pub clip_selector: Option<String>,
52    /// Return as base64 instead of bytes
53    #[serde(default)]
54    pub as_base64: bool,
55}
56
57fn default_quality() -> u8 {
58    85
59}
60
61fn default_true() -> bool {
62    true
63}
64
65impl Default for CaptureOptions {
66    fn default() -> Self {
67        Self {
68            format: CaptureFormat::Png,
69            quality: 85,
70            full_page: true,
71            width: None,
72            height: None,
73            clip_selector: None,
74            as_base64: false,
75        }
76    }
77}
78
79impl CaptureOptions {
80    /// Create options for PNG screenshot
81    pub fn png() -> Self {
82        Self {
83            format: CaptureFormat::Png,
84            ..Default::default()
85        }
86    }
87
88    /// Create options for JPEG screenshot
89    pub fn jpeg(quality: u8) -> Self {
90        Self {
91            format: CaptureFormat::Jpeg,
92            quality,
93            ..Default::default()
94        }
95    }
96
97    /// Create options for PDF
98    pub fn pdf() -> Self {
99        Self {
100            format: CaptureFormat::Pdf,
101            ..Default::default()
102        }
103    }
104
105    /// Create options for MHTML
106    pub fn mhtml() -> Self {
107        Self {
108            format: CaptureFormat::Mhtml,
109            ..Default::default()
110        }
111    }
112
113    /// Create options for HTML
114    pub fn html() -> Self {
115        Self {
116            format: CaptureFormat::Html,
117            ..Default::default()
118        }
119    }
120
121    /// Validate capture request options
122    pub fn validate(&self) -> std::result::Result<(), String> {
123        // Quality must be in valid range
124        if self.quality > 100 {
125            return Err("Quality must be between 0 and 100".to_string());
126        }
127
128        // Width/height constraints
129        if let Some(w) = self.width {
130            if w == 0 || w > 16384 {
131                return Err("Width must be between 1 and 16384".to_string());
132            }
133        }
134        if let Some(h) = self.height {
135            if h == 0 || h > 16384 {
136                return Err("Height must be between 1 and 16384".to_string());
137            }
138        }
139
140        Ok(())
141    }
142}
143
144/// Result of a capture operation
145#[derive(Debug, Clone)]
146pub struct CaptureResult {
147    /// The captured data
148    pub data: Vec<u8>,
149    /// The format of the capture
150    pub format: CaptureFormat,
151    /// Base64 encoded data (if requested)
152    pub base64: Option<String>,
153    /// Width of the capture (for images)
154    pub width: Option<u32>,
155    /// Height of the capture (for images)
156    pub height: Option<u32>,
157    /// Size in bytes
158    pub size: usize,
159}
160
161impl CaptureResult {
162    /// Get data as base64
163    pub fn to_base64(&self) -> String {
164        BASE64.encode(&self.data)
165    }
166
167    /// Get appropriate MIME type
168    pub fn mime_type(&self) -> &'static str {
169        match self.format {
170            CaptureFormat::Png => "image/png",
171            CaptureFormat::Jpeg => "image/jpeg",
172            CaptureFormat::Webp => "image/webp",
173            CaptureFormat::Pdf => "application/pdf",
174            CaptureFormat::Mhtml => "multipart/related",
175            CaptureFormat::Html => "text/html",
176        }
177    }
178
179    /// Get file extension
180    pub fn extension(&self) -> &'static str {
181        match self.format {
182            CaptureFormat::Png => "png",
183            CaptureFormat::Jpeg => "jpg",
184            CaptureFormat::Webp => "webp",
185            CaptureFormat::Pdf => "pdf",
186            CaptureFormat::Mhtml => "mhtml",
187            CaptureFormat::Html => "html",
188        }
189    }
190}
191
192/// Page capture functionality
193pub struct PageCapture;
194
195impl PageCapture {
196    /// Capture a page with the given options
197    #[instrument(skip(page))]
198    pub async fn capture(page: &PageHandle, options: &CaptureOptions) -> Result<CaptureResult> {
199        match options.format {
200            CaptureFormat::Png | CaptureFormat::Jpeg | CaptureFormat::Webp => {
201                Self::screenshot(page, options).await
202            }
203            CaptureFormat::Pdf => Self::pdf(page, options).await,
204            CaptureFormat::Mhtml => Self::mhtml(page).await,
205            CaptureFormat::Html => Self::html(page).await,
206        }
207    }
208
209    /// Take a screenshot
210    #[instrument(skip(page))]
211    pub async fn screenshot(page: &PageHandle, options: &CaptureOptions) -> Result<CaptureResult> {
212        info!("Capturing screenshot");
213
214        let format = match options.format {
215            CaptureFormat::Png => CaptureScreenshotFormat::Png,
216            CaptureFormat::Jpeg => CaptureScreenshotFormat::Jpeg,
217            CaptureFormat::Webp => CaptureScreenshotFormat::Webp,
218            _ => CaptureScreenshotFormat::Png,
219        };
220
221        let mut params_builder = ScreenshotParams::builder()
222            .format(format)
223            .from_surface(true)
224            .capture_beyond_viewport(options.full_page);
225
226        // Set quality for JPEG/WebP
227        if matches!(options.format, CaptureFormat::Jpeg | CaptureFormat::Webp) {
228            params_builder = params_builder.quality(options.quality as i64);
229        }
230
231        let params = params_builder.build();
232
233        let data = page
234            .page
235            .screenshot(params)
236            .await
237            .map_err(|e| CaptureError::ScreenshotFailed(e.to_string()))?;
238
239        let size = data.len();
240        debug!("Screenshot captured: {} bytes", size);
241
242        let base64 = if options.as_base64 {
243            Some(BASE64.encode(&data))
244        } else {
245            None
246        };
247
248        Ok(CaptureResult {
249            data,
250            format: options.format,
251            base64,
252            width: options.width,
253            height: options.height,
254            size,
255        })
256    }
257
258    /// Generate a PDF
259    #[instrument(skip(page))]
260    pub async fn pdf(page: &PageHandle, options: &CaptureOptions) -> Result<CaptureResult> {
261        info!("Generating PDF");
262
263        let mut params_builder = PrintToPdfParams::builder()
264            .print_background(true)
265            .prefer_css_page_size(true);
266
267        // Set page size if specified
268        if let (Some(width), Some(height)) = (options.width, options.height) {
269            params_builder = params_builder
270                .paper_width(width as f64 / 96.0) // Convert pixels to inches
271                .paper_height(height as f64 / 96.0);
272        }
273
274        let params = params_builder.build();
275
276        let data = page
277            .page
278            .pdf(params)
279            .await
280            .map_err(|e| CaptureError::PdfFailed(e.to_string()))?;
281
282        let size = data.len();
283        debug!("PDF generated: {} bytes", size);
284
285        let base64 = if options.as_base64 {
286            Some(BASE64.encode(&data))
287        } else {
288            None
289        };
290
291        Ok(CaptureResult {
292            data,
293            format: CaptureFormat::Pdf,
294            base64,
295            width: options.width,
296            height: options.height,
297            size,
298        })
299    }
300
301    /// Capture MHTML archive
302    #[instrument(skip(page))]
303    pub async fn mhtml(page: &PageHandle) -> Result<CaptureResult> {
304        info!("Capturing MHTML");
305
306        let params = CaptureSnapshotParams::builder()
307            .format(CaptureSnapshotFormat::Mhtml)
308            .build();
309
310        let result = page
311            .page
312            .execute(params)
313            .await
314            .map_err(|e| CaptureError::MhtmlFailed(e.to_string()))?;
315
316        let data = result.data.clone().into_bytes();
317        let size = data.len();
318        debug!("MHTML captured: {} bytes", size);
319
320        Ok(CaptureResult {
321            data,
322            format: CaptureFormat::Mhtml,
323            base64: None,
324            width: None,
325            height: None,
326            size,
327        })
328    }
329
330    /// Capture raw HTML
331    #[instrument(skip(page))]
332    pub async fn html(page: &PageHandle) -> Result<CaptureResult> {
333        info!("Capturing HTML");
334
335        let html: String = page
336            .page
337            .evaluate("document.documentElement.outerHTML")
338            .await
339            .map_err(|e| CaptureError::HtmlFailed(e.to_string()))?
340            .into_value()
341            .map_err(|e| CaptureError::HtmlFailed(e.to_string()))?;
342
343        let data = html.into_bytes();
344        let size = data.len();
345        debug!("HTML captured: {} bytes", size);
346
347        Ok(CaptureResult {
348            data,
349            format: CaptureFormat::Html,
350            base64: None,
351            width: None,
352            height: None,
353            size,
354        })
355    }
356
357    /// Capture a specific element
358    #[instrument(skip(page))]
359    pub async fn element_screenshot(
360        page: &PageHandle,
361        selector: &str,
362        format: CaptureFormat,
363    ) -> Result<CaptureResult> {
364        info!("Capturing element: {}", selector);
365
366        let element = page
367            .page
368            .find_element(selector)
369            .await
370            .map_err(|e| CaptureError::ScreenshotFailed(format!("Element not found: {}", e)))?;
371
372        let cdp_format = match format {
373            CaptureFormat::Png => CaptureScreenshotFormat::Png,
374            CaptureFormat::Jpeg => CaptureScreenshotFormat::Jpeg,
375            CaptureFormat::Webp => CaptureScreenshotFormat::Webp,
376            _ => CaptureScreenshotFormat::Png,
377        };
378
379        let data = element
380            .screenshot(cdp_format)
381            .await
382            .map_err(|e| CaptureError::ScreenshotFailed(e.to_string()))?;
383
384        let size = data.len();
385        debug!("Element screenshot captured: {} bytes", size);
386
387        Ok(CaptureResult {
388            data,
389            format,
390            base64: None,
391            width: None,
392            height: None,
393            size,
394        })
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    // ========================================================================
403    // CaptureOptions Tests
404    // ========================================================================
405
406    #[test]
407    fn test_capture_options_default() {
408        let opts = CaptureOptions::default();
409        assert_eq!(opts.format, CaptureFormat::Png);
410        assert_eq!(opts.quality, 85);
411        assert!(opts.full_page);
412        assert!(!opts.as_base64);
413        assert!(opts.width.is_none());
414        assert!(opts.height.is_none());
415        assert!(opts.clip_selector.is_none());
416    }
417
418    #[test]
419    fn test_capture_format_factories() {
420        let png = CaptureOptions::png();
421        assert_eq!(png.format, CaptureFormat::Png);
422
423        let jpeg = CaptureOptions::jpeg(90);
424        assert_eq!(jpeg.format, CaptureFormat::Jpeg);
425        assert_eq!(jpeg.quality, 90);
426
427        let pdf = CaptureOptions::pdf();
428        assert_eq!(pdf.format, CaptureFormat::Pdf);
429
430        let mhtml = CaptureOptions::mhtml();
431        assert_eq!(mhtml.format, CaptureFormat::Mhtml);
432
433        let html = CaptureOptions::html();
434        assert_eq!(html.format, CaptureFormat::Html);
435    }
436
437    #[test]
438    fn test_validate_capture_request_valid() {
439        let opts = CaptureOptions {
440            format: CaptureFormat::Png,
441            quality: 85,
442            full_page: true,
443            width: Some(1920),
444            height: Some(1080),
445            clip_selector: None,
446            as_base64: false,
447        };
448        assert!(opts.validate().is_ok());
449    }
450
451    #[test]
452    fn test_validate_capture_request_valid_minimal() {
453        let opts = CaptureOptions::default();
454        assert!(opts.validate().is_ok());
455    }
456
457    #[test]
458    fn test_validate_capture_request_quality_too_high() {
459        let opts = CaptureOptions {
460            quality: 101,
461            ..Default::default()
462        };
463        let result = opts.validate();
464        assert!(result.is_err());
465        assert!(result.unwrap_err().contains("Quality"));
466    }
467
468    #[test]
469    fn test_validate_capture_request_width_too_large() {
470        let opts = CaptureOptions {
471            width: Some(20000),
472            ..Default::default()
473        };
474        let result = opts.validate();
475        assert!(result.is_err());
476        assert!(result.unwrap_err().contains("Width"));
477    }
478
479    #[test]
480    fn test_validate_capture_request_height_zero() {
481        let opts = CaptureOptions {
482            height: Some(0),
483            ..Default::default()
484        };
485        let result = opts.validate();
486        assert!(result.is_err());
487        assert!(result.unwrap_err().contains("Height"));
488    }
489
490    #[test]
491    fn test_validate_capture_request_max_dimensions() {
492        let opts = CaptureOptions {
493            width: Some(16384),
494            height: Some(16384),
495            ..Default::default()
496        };
497        assert!(opts.validate().is_ok());
498    }
499
500    // ========================================================================
501    // CaptureResult Tests
502    // ========================================================================
503
504    #[test]
505    fn test_capture_result_mime_type() {
506        let formats_and_mimes = [
507            (CaptureFormat::Png, "image/png"),
508            (CaptureFormat::Jpeg, "image/jpeg"),
509            (CaptureFormat::Webp, "image/webp"),
510            (CaptureFormat::Pdf, "application/pdf"),
511            (CaptureFormat::Mhtml, "multipart/related"),
512            (CaptureFormat::Html, "text/html"),
513        ];
514
515        for (format, expected_mime) in formats_and_mimes {
516            let result = CaptureResult {
517                data: vec![],
518                format,
519                base64: None,
520                width: None,
521                height: None,
522                size: 0,
523            };
524            assert_eq!(result.mime_type(), expected_mime);
525        }
526    }
527
528    #[test]
529    fn test_capture_result_extension() {
530        let formats_and_exts = [
531            (CaptureFormat::Png, "png"),
532            (CaptureFormat::Jpeg, "jpg"),
533            (CaptureFormat::Webp, "webp"),
534            (CaptureFormat::Pdf, "pdf"),
535            (CaptureFormat::Mhtml, "mhtml"),
536            (CaptureFormat::Html, "html"),
537        ];
538
539        for (format, expected_ext) in formats_and_exts {
540            let result = CaptureResult {
541                data: vec![],
542                format,
543                base64: None,
544                width: None,
545                height: None,
546                size: 0,
547            };
548            assert_eq!(result.extension(), expected_ext);
549        }
550    }
551
552    #[test]
553    fn test_capture_result_base64() {
554        let result = CaptureResult {
555            data: b"hello".to_vec(),
556            format: CaptureFormat::Png,
557            base64: None,
558            width: None,
559            height: None,
560            size: 5,
561        };
562        assert_eq!(result.to_base64(), "aGVsbG8=");
563    }
564
565    #[test]
566    fn test_capture_result_base64_empty() {
567        let result = CaptureResult {
568            data: vec![],
569            format: CaptureFormat::Png,
570            base64: None,
571            width: None,
572            height: None,
573            size: 0,
574        };
575        assert_eq!(result.to_base64(), "");
576    }
577
578    #[test]
579    fn test_capture_result_base64_binary() {
580        let result = CaptureResult {
581            data: vec![0x89, 0x50, 0x4E, 0x47], // PNG header
582            format: CaptureFormat::Png,
583            base64: None,
584            width: None,
585            height: None,
586            size: 4,
587        };
588        let b64 = result.to_base64();
589        assert!(!b64.is_empty());
590        // Verify it decodes back correctly
591        let decoded = BASE64.decode(&b64).unwrap();
592        assert_eq!(decoded, vec![0x89, 0x50, 0x4E, 0x47]);
593    }
594
595    #[test]
596    fn test_capture_result_with_dimensions() {
597        let result = CaptureResult {
598            data: vec![1, 2, 3],
599            format: CaptureFormat::Png,
600            base64: None,
601            width: Some(1920),
602            height: Some(1080),
603            size: 3,
604        };
605        assert_eq!(result.width, Some(1920));
606        assert_eq!(result.height, Some(1080));
607        assert_eq!(result.size, 3);
608    }
609
610    #[test]
611    fn test_capture_result_with_precomputed_base64() {
612        let result = CaptureResult {
613            data: b"hello".to_vec(),
614            format: CaptureFormat::Png,
615            base64: Some("precomputed".to_string()),
616            width: None,
617            height: None,
618            size: 5,
619        };
620        // base64 field is stored separately from to_base64() computation
621        assert_eq!(result.base64, Some("precomputed".to_string()));
622        // to_base64() still computes from data
623        assert_eq!(result.to_base64(), "aGVsbG8=");
624    }
625
626    // ========================================================================
627    // CaptureFormat Tests
628    // ========================================================================
629
630    #[test]
631    fn test_capture_format_default() {
632        let format = CaptureFormat::default();
633        assert_eq!(format, CaptureFormat::Png);
634    }
635
636    #[test]
637    fn test_capture_format_serialization() {
638        let format = CaptureFormat::Jpeg;
639        let json = serde_json::to_string(&format).unwrap();
640        assert_eq!(json, "\"jpeg\"");
641
642        let deserialized: CaptureFormat = serde_json::from_str(&json).unwrap();
643        assert_eq!(deserialized, CaptureFormat::Jpeg);
644    }
645
646    #[test]
647    fn test_capture_format_all_variants_serialize() {
648        let formats = [
649            (CaptureFormat::Png, "\"png\""),
650            (CaptureFormat::Jpeg, "\"jpeg\""),
651            (CaptureFormat::Webp, "\"webp\""),
652            (CaptureFormat::Pdf, "\"pdf\""),
653            (CaptureFormat::Mhtml, "\"mhtml\""),
654            (CaptureFormat::Html, "\"html\""),
655        ];
656
657        for (format, expected_json) in formats {
658            let json = serde_json::to_string(&format).unwrap();
659            assert_eq!(json, expected_json);
660        }
661    }
662
663    #[test]
664    fn test_capture_format_equality() {
665        assert_eq!(CaptureFormat::Png, CaptureFormat::Png);
666        assert_ne!(CaptureFormat::Png, CaptureFormat::Jpeg);
667    }
668
669    #[test]
670    fn test_capture_format_clone() {
671        let format = CaptureFormat::Webp;
672        let cloned = format;
673        assert_eq!(format, cloned);
674    }
675
676    // ========================================================================
677    // CaptureOptions Serialization Tests
678    // ========================================================================
679
680    #[test]
681    fn test_capture_options_serialization() {
682        let opts = CaptureOptions {
683            format: CaptureFormat::Jpeg,
684            quality: 90,
685            full_page: false,
686            width: Some(800),
687            height: Some(600),
688            clip_selector: Some("#main".to_string()),
689            as_base64: true,
690        };
691
692        let json = serde_json::to_string(&opts).unwrap();
693        assert!(json.contains("\"jpeg\""));
694        assert!(json.contains("90"));
695        assert!(json.contains("#main"));
696
697        let deserialized: CaptureOptions = serde_json::from_str(&json).unwrap();
698        assert_eq!(deserialized.format, CaptureFormat::Jpeg);
699        assert_eq!(deserialized.quality, 90);
700        assert!(!deserialized.full_page);
701        assert!(deserialized.as_base64);
702    }
703
704    #[test]
705    fn test_capture_options_deserialize_with_defaults() {
706        let json = r#"{"format": "png"}"#;
707        let opts: CaptureOptions = serde_json::from_str(json).unwrap();
708
709        // Check defaults are applied
710        assert_eq!(opts.format, CaptureFormat::Png);
711        assert_eq!(opts.quality, 85); // default
712        assert!(opts.full_page); // default true
713        assert!(!opts.as_base64); // default false
714    }
715
716    #[test]
717    fn test_capture_options_jpeg_quality_boundary() {
718        let opts_min = CaptureOptions::jpeg(0);
719        assert_eq!(opts_min.quality, 0);
720        assert!(opts_min.validate().is_ok());
721
722        let opts_max = CaptureOptions::jpeg(100);
723        assert_eq!(opts_max.quality, 100);
724        assert!(opts_max.validate().is_ok());
725    }
726}