reasonkit/
web_interface.rs

1//! Web Browser Interface Trait for ReasonKit Core
2//!
3//! This module defines the abstraction layer for reasonkit-core to interact with
4//! reasonkit-web's browser automation and content extraction capabilities.
5//!
6//! # Architecture
7//!
8//! ```text
9//! ReasonKit Core --> WebBrowserAdapter --> ReasonKit Web
10//!                   (trait interface)
11//!        |- navigate()
12//!        |- extract_content()
13//!        `- capture_screenshot()
14//! ```
15//!
16//! # Design Principles
17//!
18//! - **Async-First**: All operations use `async-trait` for future compatibility
19//! - **Type-Safe**: Strong types for URLs, content, and capture formats
20//! - **Error Handling**: Comprehensive error types via `thiserror`
21//! - **Flexible Implementations**: Support local MCP server, FFI, or HTTP bindings
22//! - **Performance**: Connection pooling and caching by default
23//!
24//! # Example
25//!
26//! ```rust,no_run
27//! use reasonkit::web_interface::{WebBrowserAdapter, NavigateOptions, CaptureFormat};
28//!
29//! #[tokio::main]
30//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
31//!     // Implementation will use concrete adapter (e.g., McpWebAdapter, HttpWebAdapter)
32//!     // let adapter = McpWebAdapter::new(config).await?;
33//!
34//!     // Navigate to URL
35//!     // let page = adapter.navigate(
36//!     //     "https://example.com",
37//!     //     NavigateOptions::default(),
38//!     // ).await?;
39//!
40//!     // Extract main content
41//!     // let content = adapter.extract_content(
42//!     //     &page,
43//!     //     ExtractOptions::default(),
44//!     // ).await?;
45//!
46//!     // Capture screenshot
47//!     // let screenshot = adapter.capture_screenshot(
48//!     //     &page,
49//!     //     CaptureOptions::default().format(CaptureFormat::Png),
50//!     // ).await?;
51//!
52//!     Ok(())
53//! }
54//! ```
55
56use async_trait::async_trait;
57use serde::{Deserialize, Serialize};
58use std::collections::HashMap;
59use std::fmt;
60use thiserror::Error;
61
62// =============================================================================
63// ERROR TYPES
64// =============================================================================
65
66/// Web browser adapter error types
67#[derive(Error, Debug)]
68pub enum WebAdapterError {
69    /// Navigation to URL failed
70    #[error("Navigation failed: {message}")]
71    NavigationFailed { message: String },
72
73    /// Content extraction failed
74    #[error("Content extraction failed: {message}")]
75    ExtractionFailed { message: String },
76
77    /// Screenshot/capture failed
78    #[error("Capture failed: {format:?} - {message}")]
79    CaptureFailed {
80        format: CaptureFormat,
81        message: String,
82    },
83
84    /// Page navigation timed out
85    #[error("Navigation timeout after {0}ms")]
86    NavigationTimeout(u64),
87
88    /// Invalid URL provided
89    #[error("Invalid URL: {0}")]
90    InvalidUrl(String),
91
92    /// Adapter not connected
93    #[error("Adapter not connected to web service")]
94    NotConnected,
95
96    /// Connection lost
97    #[error("Connection to web service lost")]
98    ConnectionLost,
99
100    /// Selector parsing error
101    #[error("Invalid CSS selector: {0}")]
102    InvalidSelector(String),
103
104    /// JavaScript execution failed
105    #[error("JavaScript execution failed: {message}")]
106    JavaScriptError { message: String },
107
108    /// Unsupported capture format for current implementation
109    #[error("Capture format not supported: {0:?}")]
110    UnsupportedFormat(CaptureFormat),
111
112    /// Resource not found (HTTP 404, etc.)
113    #[error("Resource not found: {0}")]
114    NotFound(String),
115
116    /// Network error
117    #[error("Network error: {0}")]
118    Network(String),
119
120    /// Serialization/deserialization error
121    #[error("Serialization error: {0}")]
122    Serialization(String),
123
124    /// Generic adapter error
125    #[error("{0}")]
126    Generic(String),
127}
128
129/// Result type alias for web adapter operations
130pub type WebAdapterResult<T> = std::result::Result<T, WebAdapterError>;
131
132// =============================================================================
133// DATA TYPES
134// =============================================================================
135
136/// Represents a page/tab in the browser
137#[derive(Debug, Clone, Default, Serialize, Deserialize)]
138pub struct PageHandle {
139    /// Unique page identifier
140    pub id: String,
141    /// Current page URL
142    pub url: String,
143    /// Page title
144    pub title: String,
145    /// Whether page is still valid/accessible
146    pub is_active: bool,
147}
148
149impl fmt::Display for PageHandle {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        write!(f, "Page({}: {})", self.id, self.url)
152    }
153}
154
155/// Options for page navigation
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct NavigateOptions {
158    /// Maximum time to wait for page load (milliseconds)
159    /// Default: 30000 (30 seconds)
160    pub timeout_ms: u64,
161
162    /// Wait until event
163    /// Possible values: "load", "domcontentloaded", "networkidle"
164    /// Default: "load"
165    pub wait_until: NavigateWaitEvent,
166
167    /// JavaScript to execute after page load
168    pub inject_js: Option<String>,
169
170    /// Headers to send with navigation request
171    pub headers: HashMap<String, String>,
172
173    /// User agent override
174    pub user_agent: Option<String>,
175
176    /// Viewport dimensions (width, height)
177    pub viewport: Option<(u32, u32)>,
178
179    /// Follow redirects (default: true)
180    pub follow_redirects: bool,
181}
182
183impl Default for NavigateOptions {
184    fn default() -> Self {
185        Self {
186            timeout_ms: 30000,
187            wait_until: NavigateWaitEvent::Load,
188            inject_js: None,
189            headers: HashMap::new(),
190            user_agent: None,
191            viewport: None,
192            follow_redirects: true,
193        }
194    }
195}
196
197/// Page load wait event
198#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
199#[serde(rename_all = "lowercase")]
200pub enum NavigateWaitEvent {
201    /// Wait for page load event
202    #[default]
203    Load,
204    /// Wait for DOM content loaded event
205    DomContentLoaded,
206    /// Wait for network to idle (no pending requests)
207    NetworkIdle,
208}
209
210impl fmt::Display for NavigateWaitEvent {
211    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
212        match self {
213            Self::Load => write!(f, "load"),
214            Self::DomContentLoaded => write!(f, "domcontentloaded"),
215            Self::NetworkIdle => write!(f, "networkidle"),
216        }
217    }
218}
219
220/// Extracted content from a page
221#[derive(Debug, Clone, Default, Serialize, Deserialize)]
222pub struct ExtractedContent {
223    /// Main body text
224    pub text: String,
225
226    /// Extracted HTML (optional, if structured extraction requested)
227    pub html: Option<String>,
228
229    /// Extracted links
230    pub links: Vec<ExtractedLink>,
231
232    /// Extracted images
233    pub images: Vec<ExtractedImage>,
234
235    /// Extracted metadata
236    pub metadata: ContentMetadata,
237
238    /// Structured data (JSON-LD, microdata, etc.)
239    pub structured_data: Option<serde_json::Value>,
240
241    /// Language detection
242    pub language: Option<String>,
243
244    /// Extraction confidence (0.0-1.0)
245    pub confidence: f32,
246}
247
248/// Extracted link from content
249#[derive(Debug, Clone, Default, Serialize, Deserialize)]
250pub struct ExtractedLink {
251    /// Link text
252    pub text: String,
253    /// Link URL
254    pub href: String,
255    /// Link title attribute
256    pub title: Option<String>,
257}
258
259/// Extracted image from content
260#[derive(Debug, Clone, Default, Serialize, Deserialize)]
261pub struct ExtractedImage {
262    /// Image URL
263    pub src: String,
264    /// Alt text
265    pub alt: Option<String>,
266    /// Image title
267    pub title: Option<String>,
268}
269
270/// Content metadata
271#[derive(Debug, Clone, Default, Serialize, Deserialize)]
272pub struct ContentMetadata {
273    /// Page title
274    pub title: Option<String>,
275    /// Page description
276    pub description: Option<String>,
277    /// Open Graph image
278    pub og_image: Option<String>,
279    /// Open Graph title
280    pub og_title: Option<String>,
281    /// Content type (text/html, application/json, etc.)
282    pub content_type: Option<String>,
283    /// Character encoding
284    pub charset: Option<String>,
285    /// Author
286    pub author: Option<String>,
287    /// Publication date
288    pub publish_date: Option<String>,
289    /// Custom meta tags
290    pub custom_meta: HashMap<String, String>,
291}
292
293/// Options for content extraction
294#[derive(Debug, Clone, Serialize, Deserialize)]
295pub struct ExtractOptions {
296    /// CSS selector for main content area (optional, auto-detect if not specified)
297    pub content_selector: Option<String>,
298
299    /// Extract links (default: true)
300    pub extract_links: bool,
301
302    /// Extract images (default: false)
303    pub extract_images: bool,
304
305    /// Extract structured data (default: false)
306    pub extract_structured_data: bool,
307
308    /// Remove script and style tags (default: true)
309    pub remove_scripts: bool,
310
311    /// Minimum text length to include (default: 20 chars)
312    pub min_text_length: usize,
313
314    /// Detect language (default: false)
315    pub detect_language: bool,
316
317    /// Custom JavaScript to execute for extraction
318    pub custom_js: Option<String>,
319}
320
321impl Default for ExtractOptions {
322    fn default() -> Self {
323        Self {
324            content_selector: None,
325            extract_links: true,
326            extract_images: false,
327            extract_structured_data: false,
328            remove_scripts: true,
329            min_text_length: 20,
330            detect_language: false,
331            custom_js: None,
332        }
333    }
334}
335
336/// Screenshot/capture format
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize, Hash)]
338#[serde(rename_all = "lowercase")]
339pub enum CaptureFormat {
340    /// PNG image (lossless, recommended)
341    #[default]
342    Png,
343    /// JPEG image (compressed, smaller file size)
344    Jpeg,
345    /// PDF document
346    Pdf,
347    /// MHTML archive (page + resources)
348    Mhtml,
349    /// Full HTML source
350    Html,
351    /// WebP image (modern, good compression)
352    Webp,
353}
354
355impl fmt::Display for CaptureFormat {
356    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357        match self {
358            Self::Png => write!(f, "png"),
359            Self::Jpeg => write!(f, "jpeg"),
360            Self::Pdf => write!(f, "pdf"),
361            Self::Mhtml => write!(f, "mhtml"),
362            Self::Html => write!(f, "html"),
363            Self::Webp => write!(f, "webp"),
364        }
365    }
366}
367
368/// Captured page content/screenshot
369#[derive(Debug, Clone, Default, Serialize, Deserialize)]
370pub struct CapturedPage {
371    /// Format of captured content
372    pub format: CaptureFormat,
373
374    /// Raw captured data (PNG bytes, PDF bytes, HTML string, etc.)
375    pub data: Vec<u8>,
376
377    /// MIME type
378    pub mime_type: String,
379
380    /// File size in bytes
381    pub size_bytes: usize,
382
383    /// Capture metadata
384    pub metadata: CaptureMetadata,
385}
386
387impl CapturedPage {
388    /// Get captured data as string (for text formats)
389    pub fn as_string(&self) -> WebAdapterResult<String> {
390        String::from_utf8(self.data.clone())
391            .map_err(|e| WebAdapterError::Serialization(e.to_string()))
392    }
393}
394
395/// Capture metadata
396#[derive(Debug, Clone, Default, Serialize, Deserialize)]
397pub struct CaptureMetadata {
398    /// Page URL that was captured
399    pub url: String,
400
401    /// Page title at time of capture
402    pub title: Option<String>,
403
404    /// Viewport width used for capture
405    pub viewport_width: u32,
406
407    /// Viewport height used for capture
408    pub viewport_height: u32,
409
410    /// Whether full page was captured (vs viewport)
411    pub full_page: bool,
412
413    /// Device scale factor (1.0 for normal, 2.0 for retina, etc.)
414    pub device_scale_factor: f32,
415}
416
417/// Options for page capture/screenshot
418#[derive(Debug, Clone, Serialize, Deserialize)]
419pub struct CaptureOptions {
420    /// Capture format (default: PNG)
421    pub format: CaptureFormat,
422
423    /// Capture full page or just viewport (default: true for PNG/JPEG, false for PDF)
424    pub full_page: bool,
425
426    /// Timeout for capture (milliseconds, default: 10000)
427    pub timeout_ms: u64,
428
429    /// Quality for JPEG/WebP (0-100, default: 80)
430    pub quality: Option<u8>,
431
432    /// Omit background (PNG only, default: false)
433    pub omit_background: bool,
434
435    /// Device scale factor (default: 1.0)
436    pub device_scale_factor: Option<f32>,
437
438    /// Wait delay before capture (milliseconds, default: 0)
439    pub delay_ms: u64,
440
441    /// JavaScript to execute before capture
442    pub execute_js: Option<String>,
443}
444
445impl Default for CaptureOptions {
446    fn default() -> Self {
447        Self {
448            format: CaptureFormat::Png,
449            full_page: true,
450            timeout_ms: 10000,
451            quality: Some(80),
452            omit_background: false,
453            device_scale_factor: None,
454            delay_ms: 0,
455            execute_js: None,
456        }
457    }
458}
459
460impl CaptureOptions {
461    /// Set capture format
462    pub fn format(mut self, format: CaptureFormat) -> Self {
463        self.format = format;
464        self
465    }
466
467    /// Set full page capture
468    pub fn full_page(mut self, full: bool) -> Self {
469        self.full_page = full;
470        self
471    }
472
473    /// Set quality (for JPEG/WebP)
474    pub fn quality(mut self, quality: u8) -> Self {
475        self.quality = Some(quality.min(100));
476        self
477    }
478
479    /// Set timeout
480    pub fn timeout_ms(mut self, timeout: u64) -> Self {
481        self.timeout_ms = timeout;
482        self
483    }
484}
485
486// =============================================================================
487// TRAIT DEFINITION
488// =============================================================================
489
490/// Web browser adapter trait for reasonkit-core
491///
492/// Provides abstraction for browser automation and content extraction.
493/// Implementations can use MCP servers, HTTP binding, FFI, or other mechanisms
494/// to communicate with reasonkit-web.
495///
496/// # Implementing the Trait
497///
498/// - **McpWebAdapter**: Uses MCP stdio server (local or remote)
499/// - **HttpWebAdapter**: Uses HTTP JSON-RPC binding
500/// - **LocalWebAdapter**: Direct FFI binding for same-process usage
501///
502/// # Connection Lifecycle
503///
504/// 1. Create adapter with configuration
505/// 2. Call `connect()` to establish connection
506/// 3. Use navigation/extraction/capture methods
507/// 4. Call `disconnect()` when done
508///
509/// Implementations MUST handle reconnection automatically on transient failures.
510#[async_trait]
511pub trait WebBrowserAdapter: Send + Sync {
512    // -------------------------------------------------------------------------
513    // LIFECYCLE
514    // -------------------------------------------------------------------------
515
516    /// Initialize and connect to the web browser service
517    ///
518    /// # Errors
519    ///
520    /// Returns `WebAdapterError::NotConnected` if service is unavailable.
521    ///
522    /// # Implementation Notes
523    ///
524    /// - May start a browser process (headless Chrome, etc.)
525    /// - May connect to an existing MCP server
526    /// - May verify API compatibility
527    /// - Should implement connection pooling if needed
528    async fn connect(&mut self) -> WebAdapterResult<()>;
529
530    /// Disconnect from web service and cleanup resources
531    ///
532    /// # Implementation Notes
533    ///
534    /// - Should close browser processes gracefully
535    /// - Should save cache/session state if applicable
536    /// - Idempotent (safe to call multiple times)
537    async fn disconnect(&mut self) -> WebAdapterResult<()>;
538
539    /// Check if adapter is currently connected
540    fn is_connected(&self) -> bool;
541
542    // -------------------------------------------------------------------------
543    // NAVIGATION
544    // -------------------------------------------------------------------------
545
546    /// Navigate to a URL and return a page handle
547    ///
548    /// # Arguments
549    ///
550    /// * `url` - Target URL to navigate to
551    /// * `options` - Navigation options (timeout, wait event, etc.)
552    ///
553    /// # Errors
554    ///
555    /// Returns:
556    /// - `WebAdapterError::InvalidUrl` if URL is malformed
557    /// - `WebAdapterError::NavigationTimeout` if timeout exceeded
558    /// - `WebAdapterError::NavigationFailed` for other failures (network, SSL, 404, etc.)
559    ///
560    /// # Implementation Notes
561    ///
562    /// - MUST validate URL before navigation
563    /// - MUST respect timeout_ms in options
564    /// - MUST wait for specified event (load, domcontentloaded, etc.)
565    /// - SHOULD respect custom headers and user agent if provided
566    /// - SHOULD inject JavaScript after load if provided
567    /// - SHOULD handle redirects according to options
568    /// - MUST return page handle with unique ID and current URL
569    async fn navigate(
570        &mut self,
571        url: &str,
572        options: NavigateOptions,
573    ) -> WebAdapterResult<PageHandle>;
574
575    /// Go back in browser history
576    ///
577    /// # Errors
578    ///
579    /// Returns `WebAdapterError::NavigationFailed` if unable to go back.
580    async fn go_back(&mut self) -> WebAdapterResult<PageHandle>;
581
582    /// Go forward in browser history
583    ///
584    /// # Errors
585    ///
586    /// Returns `WebAdapterError::NavigationFailed` if unable to go forward.
587    async fn go_forward(&mut self) -> WebAdapterResult<PageHandle>;
588
589    /// Reload current page
590    ///
591    /// # Errors
592    ///
593    /// Returns `WebAdapterError::NavigationFailed` if unable to reload.
594    async fn reload(&mut self) -> WebAdapterResult<PageHandle>;
595
596    // -------------------------------------------------------------------------
597    // CONTENT EXTRACTION
598    // -------------------------------------------------------------------------
599
600    /// Extract content from a page
601    ///
602    /// # Arguments
603    ///
604    /// * `page` - Page handle to extract from
605    /// * `options` - Extraction options
606    ///
607    /// # Errors
608    ///
609    /// Returns:
610    /// - `WebAdapterError::ExtractionFailed` if extraction fails
611    /// - `WebAdapterError::InvalidSelector` if custom selector is invalid
612    /// - `WebAdapterError::JavaScriptError` if custom JS fails
613    ///
614    /// # Implementation Notes
615    ///
616    /// - MUST extract main content text
617    /// - SHOULD auto-detect main content area if no selector provided
618    /// - SHOULD extract links and images according to options
619    /// - SHOULD execute custom JavaScript if provided
620    /// - SHOULD detect language if requested
621    /// - MUST include confidence score (0.0-1.0)
622    /// - SHOULD normalize whitespace
623    /// - SHOULD extract metadata (title, description, og tags)
624    async fn extract_content(
625        &mut self,
626        page: &PageHandle,
627        options: ExtractOptions,
628    ) -> WebAdapterResult<ExtractedContent>;
629
630    /// Execute custom JavaScript on page
631    ///
632    /// # Arguments
633    ///
634    /// * `page` - Page to execute on
635    /// * `script` - JavaScript code to execute
636    ///
637    /// # Returns
638    ///
639    /// Serialized result of JavaScript execution as JSON value
640    ///
641    /// # Errors
642    ///
643    /// Returns `WebAdapterError::JavaScriptError` if execution fails.
644    ///
645    /// # Implementation Notes
646    ///
647    /// - MUST timeout execution if it takes too long (>30s)
648    /// - MUST return JSON-serializable result
649    /// - SHOULD return last expression value
650    async fn execute_js(
651        &mut self,
652        page: &PageHandle,
653        script: &str,
654    ) -> WebAdapterResult<serde_json::Value>;
655
656    /// Get text content using CSS selector
657    ///
658    /// # Arguments
659    ///
660    /// * `page` - Page to query
661    /// * `selector` - CSS selector
662    ///
663    /// # Errors
664    ///
665    /// Returns:
666    /// - `WebAdapterError::InvalidSelector` if selector is invalid
667    /// - `WebAdapterError::ExtractionFailed` if element not found
668    async fn get_text(&mut self, page: &PageHandle, selector: &str) -> WebAdapterResult<String>;
669
670    // -------------------------------------------------------------------------
671    // SCREENSHOTS & CAPTURE
672    // -------------------------------------------------------------------------
673
674    /// Capture page as screenshot or document
675    ///
676    /// # Arguments
677    ///
678    /// * `page` - Page to capture
679    /// * `options` - Capture options (format, quality, etc.)
680    ///
681    /// # Errors
682    ///
683    /// Returns:
684    /// - `WebAdapterError::CaptureFailed` if capture fails
685    /// - `WebAdapterError::UnsupportedFormat` if format not supported
686    ///
687    /// # Implementation Notes
688    ///
689    /// - MUST support PNG, JPEG, PDF formats at minimum
690    /// - MAY support MHTML, WebP if available
691    /// - MUST respect quality setting for JPEG/WebP
692    /// - MUST capture full page or viewport according to options
693    /// - SHOULD wait for delay_ms before capturing
694    /// - SHOULD execute custom JavaScript before capture if provided
695    /// - MUST include capture metadata (viewport size, scale factor, etc.)
696    async fn capture_screenshot(
697        &mut self,
698        page: &PageHandle,
699        options: CaptureOptions,
700    ) -> WebAdapterResult<CapturedPage>;
701
702    // -------------------------------------------------------------------------
703    // DIAGNOSTICS
704    // -------------------------------------------------------------------------
705
706    /// Get connection status and diagnostics
707    ///
708    /// # Returns
709    ///
710    /// JSON object with connection info, statistics, etc.
711    fn diagnostics(&self) -> serde_json::Value;
712
713    /// Get adapter name (for logging/debugging)
714    fn name(&self) -> &str;
715
716    /// Get adapter version
717    fn version(&self) -> &str;
718}
719
720#[cfg(test)]
721mod tests {
722    use super::*;
723
724    #[test]
725    fn test_navigate_options_default() {
726        let opts = NavigateOptions::default();
727        assert_eq!(opts.timeout_ms, 30000);
728        assert_eq!(opts.wait_until, NavigateWaitEvent::Load);
729        assert!(opts.follow_redirects);
730    }
731
732    #[test]
733    fn test_capture_options_builder() {
734        let opts = CaptureOptions::default()
735            .format(CaptureFormat::Jpeg)
736            .quality(90)
737            .full_page(false);
738
739        assert_eq!(opts.format, CaptureFormat::Jpeg);
740        assert_eq!(opts.quality, Some(90));
741        assert!(!opts.full_page);
742    }
743
744    #[test]
745    fn test_capture_format_display() {
746        assert_eq!(CaptureFormat::Png.to_string(), "png");
747        assert_eq!(CaptureFormat::Jpeg.to_string(), "jpeg");
748        assert_eq!(CaptureFormat::Pdf.to_string(), "pdf");
749    }
750
751    #[test]
752    fn test_page_handle_display() {
753        let page = PageHandle {
754            id: "page-1".to_string(),
755            url: "https://example.com".to_string(),
756            title: "Example".to_string(),
757            is_active: true,
758        };
759
760        assert_eq!(page.to_string(), "Page(page-1: https://example.com)");
761    }
762
763    #[test]
764    fn test_extract_options_default() {
765        let opts = ExtractOptions::default();
766        assert!(opts.extract_links);
767        assert!(!opts.extract_images);
768        assert!(!opts.extract_structured_data);
769        assert!(opts.remove_scripts);
770    }
771
772    #[test]
773    fn test_content_metadata_default() {
774        let meta = ContentMetadata::default();
775        assert!(meta.title.is_none());
776        assert!(meta.custom_meta.is_empty());
777    }
778
779    #[test]
780    fn test_navigate_wait_event_display() {
781        assert_eq!(NavigateWaitEvent::Load.to_string(), "load");
782        assert_eq!(
783            NavigateWaitEvent::DomContentLoaded.to_string(),
784            "domcontentloaded"
785        );
786        assert_eq!(NavigateWaitEvent::NetworkIdle.to_string(), "networkidle");
787    }
788
789    #[test]
790    fn test_quality_clamping() {
791        let opts = CaptureOptions::default().quality(150);
792        assert_eq!(opts.quality, Some(100));
793    }
794
795    #[test]
796    fn test_capture_page_as_string() {
797        let page = CapturedPage {
798            format: CaptureFormat::Html,
799            data: "<html>test</html>".as_bytes().to_vec(),
800            mime_type: "text/html".to_string(),
801            size_bytes: 17,
802            metadata: CaptureMetadata {
803                url: "https://example.com".to_string(),
804                title: None,
805                viewport_width: 1024,
806                viewport_height: 768,
807                full_page: false,
808                device_scale_factor: 1.0,
809            },
810        };
811
812        assert!(page.as_string().is_ok());
813        assert_eq!(page.as_string().unwrap(), "<html>test</html>");
814    }
815}