reasonkit/traits/
web.rs

1//! Web browser adapter trait for core <-> web integration.
2//!
3//! This trait defines the contract between `reasonkit-core` and `reasonkit-web`.
4//! Implementations live in `reasonkit-web`, consumers live in `reasonkit-core`.
5
6use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::time::Duration;
10use thiserror::Error;
11
12/// Result type for web adapter operations.
13pub type WebAdapterResult<T> = Result<T, WebAdapterError>;
14
15/// Errors that can occur during web operations.
16#[derive(Error, Debug)]
17pub enum WebAdapterError {
18    #[error("Connection failed: {0}")]
19    Connection(String),
20
21    #[error("Navigation failed: {0}")]
22    Navigation(String),
23
24    #[error("Extraction failed: {0}")]
25    Extraction(String),
26
27    #[error("Timeout: {0}")]
28    Timeout(String),
29
30    #[error("Element not found: {0}")]
31    ElementNotFound(String),
32
33    #[error("JavaScript error: {0}")]
34    JavaScript(String),
35
36    #[error("Screenshot failed: {0}")]
37    Screenshot(String),
38
39    #[error("Not connected")]
40    NotConnected,
41
42    #[error("IO error: {0}")]
43    Io(#[from] std::io::Error),
44}
45
46/// Handle to a loaded page.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PageHandle {
49    pub id: String,
50    pub url: String,
51    pub title: Option<String>,
52    pub status_code: u16,
53    pub load_time_ms: u64,
54}
55
56/// Options for navigation.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct NavigateOptions {
59    pub wait_until: WaitUntil,
60    pub timeout: Duration,
61    pub user_agent: Option<String>,
62    pub headers: Vec<(String, String)>,
63    pub viewport: Option<Viewport>,
64}
65
66impl Default for NavigateOptions {
67    fn default() -> Self {
68        Self {
69            wait_until: WaitUntil::NetworkIdle,
70            timeout: Duration::from_secs(30),
71            user_agent: None,
72            headers: Vec::new(),
73            viewport: None,
74        }
75    }
76}
77
78/// When to consider navigation complete.
79#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
80pub enum WaitUntil {
81    Load,
82    DOMContentLoaded,
83    NetworkIdle,
84    NetworkAlmostIdle,
85}
86
87/// Viewport dimensions.
88#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
89pub struct Viewport {
90    pub width: u32,
91    pub height: u32,
92    pub device_scale_factor: f64,
93    pub is_mobile: bool,
94}
95
96impl Default for Viewport {
97    fn default() -> Self {
98        Self {
99            width: 1920,
100            height: 1080,
101            device_scale_factor: 1.0,
102            is_mobile: false,
103        }
104    }
105}
106
107/// Options for content extraction.
108#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct ExtractOptions {
110    pub format: ExtractFormat,
111    pub include_metadata: bool,
112    pub clean_html: bool,
113    pub include_links: bool,
114    pub include_images: bool,
115    pub max_length: Option<usize>,
116}
117
118impl Default for ExtractOptions {
119    fn default() -> Self {
120        Self {
121            format: ExtractFormat::Markdown,
122            include_metadata: true,
123            clean_html: true,
124            include_links: true,
125            include_images: false,
126            max_length: None,
127        }
128    }
129}
130
131/// Format for extracted content.
132#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
133pub enum ExtractFormat {
134    PlainText,
135    Markdown,
136    Html,
137    Json,
138}
139
140/// Extracted content from a page.
141#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ExtractedContent {
143    pub text: String,
144    pub format: ExtractFormat,
145    pub title: Option<String>,
146    pub description: Option<String>,
147    pub author: Option<String>,
148    pub published_date: Option<String>,
149    pub word_count: usize,
150    pub links: Vec<Link>,
151    pub images: Vec<Image>,
152    pub metadata: Value,
153}
154
155/// A hyperlink from extracted content.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct Link {
158    pub text: String,
159    pub href: String,
160    pub rel: Option<String>,
161}
162
163/// An image from extracted content.
164#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct Image {
166    pub src: String,
167    pub alt: Option<String>,
168    pub width: Option<u32>,
169    pub height: Option<u32>,
170}
171
172/// Options for screenshot capture.
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct CaptureOptions {
175    pub format: CaptureFormat,
176    pub quality: u8,
177    pub full_page: bool,
178    pub clip: Option<ClipRect>,
179}
180
181impl Default for CaptureOptions {
182    fn default() -> Self {
183        Self {
184            format: CaptureFormat::Png,
185            quality: 90,
186            full_page: true,
187            clip: None,
188        }
189    }
190}
191
192/// Format for captured images.
193#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
194pub enum CaptureFormat {
195    Png,
196    Jpeg,
197    Webp,
198}
199
200/// Rectangle for clipping screenshots.
201#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
202pub struct ClipRect {
203    pub x: f64,
204    pub y: f64,
205    pub width: f64,
206    pub height: f64,
207}
208
209/// A captured page (screenshot or PDF).
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct CapturedPage {
212    pub handle: PageHandle,
213    pub format: CaptureFormat,
214    pub data: Vec<u8>,
215    pub width: u32,
216    pub height: u32,
217}
218
219/// Core abstraction for web browser operations.
220///
221/// This trait is implemented by `reasonkit-web` and consumed by `reasonkit-core`.
222/// It provides a unified interface for web browsing, content extraction, and capture.
223///
224/// # Example
225///
226/// ```ignore
227/// use reasonkit::traits::{WebBrowserAdapter, NavigateOptions, ExtractOptions};
228///
229/// async fn example(browser: &mut impl WebBrowserAdapter) -> WebAdapterResult<()> {
230///     browser.connect().await?;
231///
232///     let page = browser.navigate("https://example.com", NavigateOptions::default()).await?;
233///     let content = browser.extract_content(&page, ExtractOptions::default()).await?;
234///
235///     println!("Title: {:?}", content.title);
236///     println!("Word count: {}", content.word_count);
237///
238///     browser.disconnect().await?;
239///     Ok(())
240/// }
241/// ```
242#[async_trait]
243pub trait WebBrowserAdapter: Send + Sync {
244    // ─────────────────────────────────────────────────────────────────────────
245    // Lifecycle
246    // ─────────────────────────────────────────────────────────────────────────
247
248    /// Connect to the browser instance.
249    async fn connect(&mut self) -> WebAdapterResult<()>;
250
251    /// Disconnect from the browser instance.
252    async fn disconnect(&mut self) -> WebAdapterResult<()>;
253
254    /// Check if currently connected.
255    fn is_connected(&self) -> bool;
256
257    // ─────────────────────────────────────────────────────────────────────────
258    // Navigation
259    // ─────────────────────────────────────────────────────────────────────────
260
261    /// Navigate to a URL and return a handle to the loaded page.
262    async fn navigate(&self, url: &str, options: NavigateOptions) -> WebAdapterResult<PageHandle>;
263
264    /// Wait for the page to finish loading.
265    async fn wait_for_load(&self, handle: &PageHandle, timeout: Duration) -> WebAdapterResult<()>;
266
267    /// Go back in browser history.
268    async fn go_back(&self, handle: &PageHandle) -> WebAdapterResult<()>;
269
270    /// Go forward in browser history.
271    async fn go_forward(&self, handle: &PageHandle) -> WebAdapterResult<()>;
272
273    /// Reload the current page.
274    async fn reload(&self, handle: &PageHandle) -> WebAdapterResult<()>;
275
276    // ─────────────────────────────────────────────────────────────────────────
277    // Content Extraction
278    // ─────────────────────────────────────────────────────────────────────────
279
280    /// Extract content from the page in the specified format.
281    async fn extract_content(
282        &self,
283        handle: &PageHandle,
284        options: ExtractOptions,
285    ) -> WebAdapterResult<ExtractedContent>;
286
287    /// Extract all links from the page.
288    async fn extract_links(&self, handle: &PageHandle) -> WebAdapterResult<Vec<Link>>;
289
290    /// Extract structured data using a CSS selector.
291    async fn extract_structured(
292        &self,
293        handle: &PageHandle,
294        selector: &str,
295    ) -> WebAdapterResult<Value>;
296
297    /// Get the raw HTML of the page.
298    async fn get_html(&self, handle: &PageHandle) -> WebAdapterResult<String>;
299
300    // ─────────────────────────────────────────────────────────────────────────
301    // Capture
302    // ─────────────────────────────────────────────────────────────────────────
303
304    /// Capture a screenshot of the page.
305    async fn capture_screenshot(
306        &self,
307        handle: &PageHandle,
308        options: CaptureOptions,
309    ) -> WebAdapterResult<CapturedPage>;
310
311    /// Capture the page as a PDF.
312    async fn capture_pdf(&self, handle: &PageHandle) -> WebAdapterResult<Vec<u8>>;
313
314    // ─────────────────────────────────────────────────────────────────────────
315    // Interaction
316    // ─────────────────────────────────────────────────────────────────────────
317
318    /// Click an element matching the selector.
319    async fn click(&self, handle: &PageHandle, selector: &str) -> WebAdapterResult<()>;
320
321    /// Type text into an element matching the selector.
322    async fn type_text(
323        &self,
324        handle: &PageHandle,
325        selector: &str,
326        text: &str,
327    ) -> WebAdapterResult<()>;
328
329    /// Select an option from a dropdown.
330    async fn select_option(
331        &self,
332        handle: &PageHandle,
333        selector: &str,
334        value: &str,
335    ) -> WebAdapterResult<()>;
336
337    /// Scroll the page.
338    async fn scroll(&self, handle: &PageHandle, x: f64, y: f64) -> WebAdapterResult<()>;
339
340    /// Wait for an element to appear.
341    async fn wait_for_selector(
342        &self,
343        handle: &PageHandle,
344        selector: &str,
345        timeout: Duration,
346    ) -> WebAdapterResult<()>;
347
348    // ─────────────────────────────────────────────────────────────────────────
349    // JavaScript
350    // ─────────────────────────────────────────────────────────────────────────
351
352    /// Evaluate JavaScript and return the result.
353    async fn evaluate_js(&self, handle: &PageHandle, script: &str) -> WebAdapterResult<Value>;
354
355    /// Inject a script into the page.
356    async fn inject_script(&self, handle: &PageHandle, script: &str) -> WebAdapterResult<()>;
357
358    // ─────────────────────────────────────────────────────────────────────────
359    // Cookies & Storage
360    // ─────────────────────────────────────────────────────────────────────────
361
362    /// Get all cookies for the current page.
363    async fn get_cookies(&self, handle: &PageHandle) -> WebAdapterResult<Vec<Cookie>>;
364
365    /// Set a cookie.
366    async fn set_cookie(&self, handle: &PageHandle, cookie: Cookie) -> WebAdapterResult<()>;
367
368    /// Clear all cookies.
369    async fn clear_cookies(&self, handle: &PageHandle) -> WebAdapterResult<()>;
370
371    /// Get local storage value.
372    async fn get_local_storage(
373        &self,
374        handle: &PageHandle,
375        key: &str,
376    ) -> WebAdapterResult<Option<String>>;
377
378    /// Set local storage value.
379    async fn set_local_storage(
380        &self,
381        handle: &PageHandle,
382        key: &str,
383        value: &str,
384    ) -> WebAdapterResult<()>;
385}
386
387/// A browser cookie.
388#[derive(Debug, Clone, Serialize, Deserialize)]
389pub struct Cookie {
390    pub name: String,
391    pub value: String,
392    pub domain: Option<String>,
393    pub path: Option<String>,
394    pub expires: Option<i64>,
395    pub http_only: bool,
396    pub secure: bool,
397    pub same_site: Option<String>,
398}