reasonkit/web_interface.rs
1//! Web Browser Interface Trait for ReasonKit Core
2//!
3//! This module defines the abstraction layer for reasonkit-core to interact with
4//! reasonkit-web's browser automation and content extraction capabilities.
5//!
6//! # Architecture
7//!
8//! ```text
9//! ReasonKit Core --> WebBrowserAdapter --> ReasonKit Web
10//! (trait interface)
11//! |- navigate()
12//! |- extract_content()
13//! `- capture_screenshot()
14//! ```
15//!
16//! # Design Principles
17//!
18//! - **Async-First**: All operations use `async-trait` for future compatibility
19//! - **Type-Safe**: Strong types for URLs, content, and capture formats
20//! - **Error Handling**: Comprehensive error types via `thiserror`
21//! - **Flexible Implementations**: Support local MCP server, FFI, or HTTP bindings
22//! - **Performance**: Connection pooling and caching by default
23//!
24//! # Example
25//!
26//! ```rust,no_run
27//! use reasonkit::web_interface::{WebBrowserAdapter, NavigateOptions, CaptureFormat};
28//!
29//! #[tokio::main]
30//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
31//! // Implementation will use concrete adapter (e.g., McpWebAdapter, HttpWebAdapter)
32//! // let adapter = McpWebAdapter::new(config).await?;
33//!
34//! // Navigate to URL
35//! // let page = adapter.navigate(
36//! // "https://example.com",
37//! // NavigateOptions::default(),
38//! // ).await?;
39//!
40//! // Extract main content
41//! // let content = adapter.extract_content(
42//! // &page,
43//! // ExtractOptions::default(),
44//! // ).await?;
45//!
46//! // Capture screenshot
47//! // let screenshot = adapter.capture_screenshot(
48//! // &page,
49//! // CaptureOptions::default().format(CaptureFormat::Png),
50//! // ).await?;
51//!
52//! Ok(())
53//! }
54//! ```
55
56use async_trait::async_trait;
57use serde::{Deserialize, Serialize};
58use std::collections::HashMap;
59use std::fmt;
60use thiserror::Error;
61
62// =============================================================================
63// ERROR TYPES
64// =============================================================================
65
66/// Web browser adapter error types
67#[derive(Error, Debug)]
68pub enum WebAdapterError {
69 /// Navigation to URL failed
70 #[error("Navigation failed: {message}")]
71 NavigationFailed { message: String },
72
73 /// Content extraction failed
74 #[error("Content extraction failed: {message}")]
75 ExtractionFailed { message: String },
76
77 /// Screenshot/capture failed
78 #[error("Capture failed: {format:?} - {message}")]
79 CaptureFailed {
80 format: CaptureFormat,
81 message: String,
82 },
83
84 /// Page navigation timed out
85 #[error("Navigation timeout after {0}ms")]
86 NavigationTimeout(u64),
87
88 /// Invalid URL provided
89 #[error("Invalid URL: {0}")]
90 InvalidUrl(String),
91
92 /// Adapter not connected
93 #[error("Adapter not connected to web service")]
94 NotConnected,
95
96 /// Connection lost
97 #[error("Connection to web service lost")]
98 ConnectionLost,
99
100 /// Selector parsing error
101 #[error("Invalid CSS selector: {0}")]
102 InvalidSelector(String),
103
104 /// JavaScript execution failed
105 #[error("JavaScript execution failed: {message}")]
106 JavaScriptError { message: String },
107
108 /// Unsupported capture format for current implementation
109 #[error("Capture format not supported: {0:?}")]
110 UnsupportedFormat(CaptureFormat),
111
112 /// Resource not found (HTTP 404, etc.)
113 #[error("Resource not found: {0}")]
114 NotFound(String),
115
116 /// Network error
117 #[error("Network error: {0}")]
118 Network(String),
119
120 /// Serialization/deserialization error
121 #[error("Serialization error: {0}")]
122 Serialization(String),
123
124 /// Generic adapter error
125 #[error("{0}")]
126 Generic(String),
127}
128
129/// Result type alias for web adapter operations
130pub type WebAdapterResult<T> = std::result::Result<T, WebAdapterError>;
131
132// =============================================================================
133// DATA TYPES
134// =============================================================================
135
136/// Represents a page/tab in the browser
137#[derive(Debug, Clone, Default, Serialize, Deserialize)]
138pub struct PageHandle {
139 /// Unique page identifier
140 pub id: String,
141 /// Current page URL
142 pub url: String,
143 /// Page title
144 pub title: String,
145 /// Whether page is still valid/accessible
146 pub is_active: bool,
147}
148
149impl fmt::Display for PageHandle {
150 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151 write!(f, "Page({}: {})", self.id, self.url)
152 }
153}
154
155/// Options for page navigation
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct NavigateOptions {
158 /// Maximum time to wait for page load (milliseconds)
159 /// Default: 30000 (30 seconds)
160 pub timeout_ms: u64,
161
162 /// Wait until event
163 /// Possible values: "load", "domcontentloaded", "networkidle"
164 /// Default: "load"
165 pub wait_until: NavigateWaitEvent,
166
167 /// JavaScript to execute after page load
168 pub inject_js: Option<String>,
169
170 /// Headers to send with navigation request
171 pub headers: HashMap<String, String>,
172
173 /// User agent override
174 pub user_agent: Option<String>,
175
176 /// Viewport dimensions (width, height)
177 pub viewport: Option<(u32, u32)>,
178
179 /// Follow redirects (default: true)
180 pub follow_redirects: bool,
181}
182
183impl Default for NavigateOptions {
184 fn default() -> Self {
185 Self {
186 timeout_ms: 30000,
187 wait_until: NavigateWaitEvent::Load,
188 inject_js: None,
189 headers: HashMap::new(),
190 user_agent: None,
191 viewport: None,
192 follow_redirects: true,
193 }
194 }
195}
196
197/// Page load wait event
198#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
199#[serde(rename_all = "lowercase")]
200pub enum NavigateWaitEvent {
201 /// Wait for page load event
202 #[default]
203 Load,
204 /// Wait for DOM content loaded event
205 DomContentLoaded,
206 /// Wait for network to idle (no pending requests)
207 NetworkIdle,
208}
209
210impl fmt::Display for NavigateWaitEvent {
211 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
212 match self {
213 Self::Load => write!(f, "load"),
214 Self::DomContentLoaded => write!(f, "domcontentloaded"),
215 Self::NetworkIdle => write!(f, "networkidle"),
216 }
217 }
218}
219
220/// Extracted content from a page
221#[derive(Debug, Clone, Default, Serialize, Deserialize)]
222pub struct ExtractedContent {
223 /// Main body text
224 pub text: String,
225
226 /// Extracted HTML (optional, if structured extraction requested)
227 pub html: Option<String>,
228
229 /// Extracted links
230 pub links: Vec<ExtractedLink>,
231
232 /// Extracted images
233 pub images: Vec<ExtractedImage>,
234
235 /// Extracted metadata
236 pub metadata: ContentMetadata,
237
238 /// Structured data (JSON-LD, microdata, etc.)
239 pub structured_data: Option<serde_json::Value>,
240
241 /// Language detection
242 pub language: Option<String>,
243
244 /// Extraction confidence (0.0-1.0)
245 pub confidence: f32,
246}
247
248/// Extracted link from content
249#[derive(Debug, Clone, Default, Serialize, Deserialize)]
250pub struct ExtractedLink {
251 /// Link text
252 pub text: String,
253 /// Link URL
254 pub href: String,
255 /// Link title attribute
256 pub title: Option<String>,
257}
258
259/// Extracted image from content
260#[derive(Debug, Clone, Default, Serialize, Deserialize)]
261pub struct ExtractedImage {
262 /// Image URL
263 pub src: String,
264 /// Alt text
265 pub alt: Option<String>,
266 /// Image title
267 pub title: Option<String>,
268}
269
270/// Content metadata
271#[derive(Debug, Clone, Default, Serialize, Deserialize)]
272pub struct ContentMetadata {
273 /// Page title
274 pub title: Option<String>,
275 /// Page description
276 pub description: Option<String>,
277 /// Open Graph image
278 pub og_image: Option<String>,
279 /// Open Graph title
280 pub og_title: Option<String>,
281 /// Content type (text/html, application/json, etc.)
282 pub content_type: Option<String>,
283 /// Character encoding
284 pub charset: Option<String>,
285 /// Author
286 pub author: Option<String>,
287 /// Publication date
288 pub publish_date: Option<String>,
289 /// Custom meta tags
290 pub custom_meta: HashMap<String, String>,
291}
292
293/// Options for content extraction
294#[derive(Debug, Clone, Serialize, Deserialize)]
295pub struct ExtractOptions {
296 /// CSS selector for main content area (optional, auto-detect if not specified)
297 pub content_selector: Option<String>,
298
299 /// Extract links (default: true)
300 pub extract_links: bool,
301
302 /// Extract images (default: false)
303 pub extract_images: bool,
304
305 /// Extract structured data (default: false)
306 pub extract_structured_data: bool,
307
308 /// Remove script and style tags (default: true)
309 pub remove_scripts: bool,
310
311 /// Minimum text length to include (default: 20 chars)
312 pub min_text_length: usize,
313
314 /// Detect language (default: false)
315 pub detect_language: bool,
316
317 /// Custom JavaScript to execute for extraction
318 pub custom_js: Option<String>,
319}
320
321impl Default for ExtractOptions {
322 fn default() -> Self {
323 Self {
324 content_selector: None,
325 extract_links: true,
326 extract_images: false,
327 extract_structured_data: false,
328 remove_scripts: true,
329 min_text_length: 20,
330 detect_language: false,
331 custom_js: None,
332 }
333 }
334}
335
336/// Screenshot/capture format
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, Hash)]
338#[serde(rename_all = "lowercase")]
339pub enum CaptureFormat {
340 /// PNG image (lossless, recommended)
341 #[default]
342 Png,
343 /// JPEG image (compressed, smaller file size)
344 Jpeg,
345 /// PDF document
346 Pdf,
347 /// MHTML archive (page + resources)
348 Mhtml,
349 /// Full HTML source
350 Html,
351 /// WebP image (modern, good compression)
352 Webp,
353}
354
355impl fmt::Display for CaptureFormat {
356 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357 match self {
358 Self::Png => write!(f, "png"),
359 Self::Jpeg => write!(f, "jpeg"),
360 Self::Pdf => write!(f, "pdf"),
361 Self::Mhtml => write!(f, "mhtml"),
362 Self::Html => write!(f, "html"),
363 Self::Webp => write!(f, "webp"),
364 }
365 }
366}
367
368/// Captured page content/screenshot
369#[derive(Debug, Clone, Default, Serialize, Deserialize)]
370pub struct CapturedPage {
371 /// Format of captured content
372 pub format: CaptureFormat,
373
374 /// Raw captured data (PNG bytes, PDF bytes, HTML string, etc.)
375 pub data: Vec<u8>,
376
377 /// MIME type
378 pub mime_type: String,
379
380 /// File size in bytes
381 pub size_bytes: usize,
382
383 /// Capture metadata
384 pub metadata: CaptureMetadata,
385}
386
387impl CapturedPage {
388 /// Get captured data as string (for text formats)
389 pub fn as_string(&self) -> WebAdapterResult<String> {
390 String::from_utf8(self.data.clone())
391 .map_err(|e| WebAdapterError::Serialization(e.to_string()))
392 }
393}
394
395/// Capture metadata
396#[derive(Debug, Clone, Default, Serialize, Deserialize)]
397pub struct CaptureMetadata {
398 /// Page URL that was captured
399 pub url: String,
400
401 /// Page title at time of capture
402 pub title: Option<String>,
403
404 /// Viewport width used for capture
405 pub viewport_width: u32,
406
407 /// Viewport height used for capture
408 pub viewport_height: u32,
409
410 /// Whether full page was captured (vs viewport)
411 pub full_page: bool,
412
413 /// Device scale factor (1.0 for normal, 2.0 for retina, etc.)
414 pub device_scale_factor: f32,
415}
416
417/// Options for page capture/screenshot
418#[derive(Debug, Clone, Serialize, Deserialize)]
419pub struct CaptureOptions {
420 /// Capture format (default: PNG)
421 pub format: CaptureFormat,
422
423 /// Capture full page or just viewport (default: true for PNG/JPEG, false for PDF)
424 pub full_page: bool,
425
426 /// Timeout for capture (milliseconds, default: 10000)
427 pub timeout_ms: u64,
428
429 /// Quality for JPEG/WebP (0-100, default: 80)
430 pub quality: Option<u8>,
431
432 /// Omit background (PNG only, default: false)
433 pub omit_background: bool,
434
435 /// Device scale factor (default: 1.0)
436 pub device_scale_factor: Option<f32>,
437
438 /// Wait delay before capture (milliseconds, default: 0)
439 pub delay_ms: u64,
440
441 /// JavaScript to execute before capture
442 pub execute_js: Option<String>,
443}
444
445impl Default for CaptureOptions {
446 fn default() -> Self {
447 Self {
448 format: CaptureFormat::Png,
449 full_page: true,
450 timeout_ms: 10000,
451 quality: Some(80),
452 omit_background: false,
453 device_scale_factor: None,
454 delay_ms: 0,
455 execute_js: None,
456 }
457 }
458}
459
460impl CaptureOptions {
461 /// Set capture format
462 pub fn format(mut self, format: CaptureFormat) -> Self {
463 self.format = format;
464 self
465 }
466
467 /// Set full page capture
468 pub fn full_page(mut self, full: bool) -> Self {
469 self.full_page = full;
470 self
471 }
472
473 /// Set quality (for JPEG/WebP)
474 pub fn quality(mut self, quality: u8) -> Self {
475 self.quality = Some(quality.min(100));
476 self
477 }
478
479 /// Set timeout
480 pub fn timeout_ms(mut self, timeout: u64) -> Self {
481 self.timeout_ms = timeout;
482 self
483 }
484}
485
486// =============================================================================
487// TRAIT DEFINITION
488// =============================================================================
489
490/// Web browser adapter trait for reasonkit-core
491///
492/// Provides abstraction for browser automation and content extraction.
493/// Implementations can use MCP servers, HTTP binding, FFI, or other mechanisms
494/// to communicate with reasonkit-web.
495///
496/// # Implementing the Trait
497///
498/// - **McpWebAdapter**: Uses MCP stdio server (local or remote)
499/// - **HttpWebAdapter**: Uses HTTP JSON-RPC binding
500/// - **LocalWebAdapter**: Direct FFI binding for same-process usage
501///
502/// # Connection Lifecycle
503///
504/// 1. Create adapter with configuration
505/// 2. Call `connect()` to establish connection
506/// 3. Use navigation/extraction/capture methods
507/// 4. Call `disconnect()` when done
508///
509/// Implementations MUST handle reconnection automatically on transient failures.
510#[async_trait]
511pub trait WebBrowserAdapter: Send + Sync {
512 // -------------------------------------------------------------------------
513 // LIFECYCLE
514 // -------------------------------------------------------------------------
515
516 /// Initialize and connect to the web browser service
517 ///
518 /// # Errors
519 ///
520 /// Returns `WebAdapterError::NotConnected` if service is unavailable.
521 ///
522 /// # Implementation Notes
523 ///
524 /// - May start a browser process (headless Chrome, etc.)
525 /// - May connect to an existing MCP server
526 /// - May verify API compatibility
527 /// - Should implement connection pooling if needed
528 async fn connect(&mut self) -> WebAdapterResult<()>;
529
530 /// Disconnect from web service and cleanup resources
531 ///
532 /// # Implementation Notes
533 ///
534 /// - Should close browser processes gracefully
535 /// - Should save cache/session state if applicable
536 /// - Idempotent (safe to call multiple times)
537 async fn disconnect(&mut self) -> WebAdapterResult<()>;
538
539 /// Check if adapter is currently connected
540 fn is_connected(&self) -> bool;
541
542 // -------------------------------------------------------------------------
543 // NAVIGATION
544 // -------------------------------------------------------------------------
545
546 /// Navigate to a URL and return a page handle
547 ///
548 /// # Arguments
549 ///
550 /// * `url` - Target URL to navigate to
551 /// * `options` - Navigation options (timeout, wait event, etc.)
552 ///
553 /// # Errors
554 ///
555 /// Returns:
556 /// - `WebAdapterError::InvalidUrl` if URL is malformed
557 /// - `WebAdapterError::NavigationTimeout` if timeout exceeded
558 /// - `WebAdapterError::NavigationFailed` for other failures (network, SSL, 404, etc.)
559 ///
560 /// # Implementation Notes
561 ///
562 /// - MUST validate URL before navigation
563 /// - MUST respect timeout_ms in options
564 /// - MUST wait for specified event (load, domcontentloaded, etc.)
565 /// - SHOULD respect custom headers and user agent if provided
566 /// - SHOULD inject JavaScript after load if provided
567 /// - SHOULD handle redirects according to options
568 /// - MUST return page handle with unique ID and current URL
569 async fn navigate(
570 &mut self,
571 url: &str,
572 options: NavigateOptions,
573 ) -> WebAdapterResult<PageHandle>;
574
575 /// Go back in browser history
576 ///
577 /// # Errors
578 ///
579 /// Returns `WebAdapterError::NavigationFailed` if unable to go back.
580 async fn go_back(&mut self) -> WebAdapterResult<PageHandle>;
581
582 /// Go forward in browser history
583 ///
584 /// # Errors
585 ///
586 /// Returns `WebAdapterError::NavigationFailed` if unable to go forward.
587 async fn go_forward(&mut self) -> WebAdapterResult<PageHandle>;
588
589 /// Reload current page
590 ///
591 /// # Errors
592 ///
593 /// Returns `WebAdapterError::NavigationFailed` if unable to reload.
594 async fn reload(&mut self) -> WebAdapterResult<PageHandle>;
595
596 // -------------------------------------------------------------------------
597 // CONTENT EXTRACTION
598 // -------------------------------------------------------------------------
599
600 /// Extract content from a page
601 ///
602 /// # Arguments
603 ///
604 /// * `page` - Page handle to extract from
605 /// * `options` - Extraction options
606 ///
607 /// # Errors
608 ///
609 /// Returns:
610 /// - `WebAdapterError::ExtractionFailed` if extraction fails
611 /// - `WebAdapterError::InvalidSelector` if custom selector is invalid
612 /// - `WebAdapterError::JavaScriptError` if custom JS fails
613 ///
614 /// # Implementation Notes
615 ///
616 /// - MUST extract main content text
617 /// - SHOULD auto-detect main content area if no selector provided
618 /// - SHOULD extract links and images according to options
619 /// - SHOULD execute custom JavaScript if provided
620 /// - SHOULD detect language if requested
621 /// - MUST include confidence score (0.0-1.0)
622 /// - SHOULD normalize whitespace
623 /// - SHOULD extract metadata (title, description, og tags)
624 async fn extract_content(
625 &mut self,
626 page: &PageHandle,
627 options: ExtractOptions,
628 ) -> WebAdapterResult<ExtractedContent>;
629
630 /// Execute custom JavaScript on page
631 ///
632 /// # Arguments
633 ///
634 /// * `page` - Page to execute on
635 /// * `script` - JavaScript code to execute
636 ///
637 /// # Returns
638 ///
639 /// Serialized result of JavaScript execution as JSON value
640 ///
641 /// # Errors
642 ///
643 /// Returns `WebAdapterError::JavaScriptError` if execution fails.
644 ///
645 /// # Implementation Notes
646 ///
647 /// - MUST timeout execution if it takes too long (>30s)
648 /// - MUST return JSON-serializable result
649 /// - SHOULD return last expression value
650 async fn execute_js(
651 &mut self,
652 page: &PageHandle,
653 script: &str,
654 ) -> WebAdapterResult<serde_json::Value>;
655
656 /// Get text content using CSS selector
657 ///
658 /// # Arguments
659 ///
660 /// * `page` - Page to query
661 /// * `selector` - CSS selector
662 ///
663 /// # Errors
664 ///
665 /// Returns:
666 /// - `WebAdapterError::InvalidSelector` if selector is invalid
667 /// - `WebAdapterError::ExtractionFailed` if element not found
668 async fn get_text(&mut self, page: &PageHandle, selector: &str) -> WebAdapterResult<String>;
669
670 // -------------------------------------------------------------------------
671 // SCREENSHOTS & CAPTURE
672 // -------------------------------------------------------------------------
673
674 /// Capture page as screenshot or document
675 ///
676 /// # Arguments
677 ///
678 /// * `page` - Page to capture
679 /// * `options` - Capture options (format, quality, etc.)
680 ///
681 /// # Errors
682 ///
683 /// Returns:
684 /// - `WebAdapterError::CaptureFailed` if capture fails
685 /// - `WebAdapterError::UnsupportedFormat` if format not supported
686 ///
687 /// # Implementation Notes
688 ///
689 /// - MUST support PNG, JPEG, PDF formats at minimum
690 /// - MAY support MHTML, WebP if available
691 /// - MUST respect quality setting for JPEG/WebP
692 /// - MUST capture full page or viewport according to options
693 /// - SHOULD wait for delay_ms before capturing
694 /// - SHOULD execute custom JavaScript before capture if provided
695 /// - MUST include capture metadata (viewport size, scale factor, etc.)
696 async fn capture_screenshot(
697 &mut self,
698 page: &PageHandle,
699 options: CaptureOptions,
700 ) -> WebAdapterResult<CapturedPage>;
701
702 // -------------------------------------------------------------------------
703 // DIAGNOSTICS
704 // -------------------------------------------------------------------------
705
706 /// Get connection status and diagnostics
707 ///
708 /// # Returns
709 ///
710 /// JSON object with connection info, statistics, etc.
711 fn diagnostics(&self) -> serde_json::Value;
712
713 /// Get adapter name (for logging/debugging)
714 fn name(&self) -> &str;
715
716 /// Get adapter version
717 fn version(&self) -> &str;
718}
719
720#[cfg(test)]
721mod tests {
722 use super::*;
723
724 #[test]
725 fn test_navigate_options_default() {
726 let opts = NavigateOptions::default();
727 assert_eq!(opts.timeout_ms, 30000);
728 assert_eq!(opts.wait_until, NavigateWaitEvent::Load);
729 assert!(opts.follow_redirects);
730 }
731
732 #[test]
733 fn test_capture_options_builder() {
734 let opts = CaptureOptions::default()
735 .format(CaptureFormat::Jpeg)
736 .quality(90)
737 .full_page(false);
738
739 assert_eq!(opts.format, CaptureFormat::Jpeg);
740 assert_eq!(opts.quality, Some(90));
741 assert!(!opts.full_page);
742 }
743
744 #[test]
745 fn test_capture_format_display() {
746 assert_eq!(CaptureFormat::Png.to_string(), "png");
747 assert_eq!(CaptureFormat::Jpeg.to_string(), "jpeg");
748 assert_eq!(CaptureFormat::Pdf.to_string(), "pdf");
749 }
750
751 #[test]
752 fn test_page_handle_display() {
753 let page = PageHandle {
754 id: "page-1".to_string(),
755 url: "https://example.com".to_string(),
756 title: "Example".to_string(),
757 is_active: true,
758 };
759
760 assert_eq!(page.to_string(), "Page(page-1: https://example.com)");
761 }
762
763 #[test]
764 fn test_extract_options_default() {
765 let opts = ExtractOptions::default();
766 assert!(opts.extract_links);
767 assert!(!opts.extract_images);
768 assert!(!opts.extract_structured_data);
769 assert!(opts.remove_scripts);
770 }
771
772 #[test]
773 fn test_content_metadata_default() {
774 let meta = ContentMetadata::default();
775 assert!(meta.title.is_none());
776 assert!(meta.custom_meta.is_empty());
777 }
778
779 #[test]
780 fn test_navigate_wait_event_display() {
781 assert_eq!(NavigateWaitEvent::Load.to_string(), "load");
782 assert_eq!(
783 NavigateWaitEvent::DomContentLoaded.to_string(),
784 "domcontentloaded"
785 );
786 assert_eq!(NavigateWaitEvent::NetworkIdle.to_string(), "networkidle");
787 }
788
789 #[test]
790 fn test_quality_clamping() {
791 let opts = CaptureOptions::default().quality(150);
792 assert_eq!(opts.quality, Some(100));
793 }
794
795 #[test]
796 fn test_capture_page_as_string() {
797 let page = CapturedPage {
798 format: CaptureFormat::Html,
799 data: "<html>test</html>".as_bytes().to_vec(),
800 mime_type: "text/html".to_string(),
801 size_bytes: 17,
802 metadata: CaptureMetadata {
803 url: "https://example.com".to_string(),
804 title: None,
805 viewport_width: 1024,
806 viewport_height: 768,
807 full_page: false,
808 device_scale_factor: 1.0,
809 },
810 };
811
812 assert!(page.as_string().is_ok());
813 assert_eq!(page.as_string().unwrap(), "<html>test</html>");
814 }
815}