1use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::time::Duration;
10use thiserror::Error;
11
12pub type WebAdapterResult<T> = Result<T, WebAdapterError>;
14
15#[derive(Error, Debug)]
17pub enum WebAdapterError {
18 #[error("Connection failed: {0}")]
19 Connection(String),
20
21 #[error("Navigation failed: {0}")]
22 Navigation(String),
23
24 #[error("Extraction failed: {0}")]
25 Extraction(String),
26
27 #[error("Timeout: {0}")]
28 Timeout(String),
29
30 #[error("Element not found: {0}")]
31 ElementNotFound(String),
32
33 #[error("JavaScript error: {0}")]
34 JavaScript(String),
35
36 #[error("Screenshot failed: {0}")]
37 Screenshot(String),
38
39 #[error("Not connected")]
40 NotConnected,
41
42 #[error("IO error: {0}")]
43 Io(#[from] std::io::Error),
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PageHandle {
49 pub id: String,
50 pub url: String,
51 pub title: Option<String>,
52 pub status_code: u16,
53 pub load_time_ms: u64,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct NavigateOptions {
59 pub wait_until: WaitUntil,
60 pub timeout: Duration,
61 pub user_agent: Option<String>,
62 pub headers: Vec<(String, String)>,
63 pub viewport: Option<Viewport>,
64}
65
66impl Default for NavigateOptions {
67 fn default() -> Self {
68 Self {
69 wait_until: WaitUntil::NetworkIdle,
70 timeout: Duration::from_secs(30),
71 user_agent: None,
72 headers: Vec::new(),
73 viewport: None,
74 }
75 }
76}
77
78#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
80pub enum WaitUntil {
81 Load,
82 DOMContentLoaded,
83 NetworkIdle,
84 NetworkAlmostIdle,
85}
86
87#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
89pub struct Viewport {
90 pub width: u32,
91 pub height: u32,
92 pub device_scale_factor: f64,
93 pub is_mobile: bool,
94}
95
96impl Default for Viewport {
97 fn default() -> Self {
98 Self {
99 width: 1920,
100 height: 1080,
101 device_scale_factor: 1.0,
102 is_mobile: false,
103 }
104 }
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
109pub struct ExtractOptions {
110 pub format: ExtractFormat,
111 pub include_metadata: bool,
112 pub clean_html: bool,
113 pub include_links: bool,
114 pub include_images: bool,
115 pub max_length: Option<usize>,
116}
117
118impl Default for ExtractOptions {
119 fn default() -> Self {
120 Self {
121 format: ExtractFormat::Markdown,
122 include_metadata: true,
123 clean_html: true,
124 include_links: true,
125 include_images: false,
126 max_length: None,
127 }
128 }
129}
130
131#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
133pub enum ExtractFormat {
134 PlainText,
135 Markdown,
136 Html,
137 Json,
138}
139
140#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ExtractedContent {
143 pub text: String,
144 pub format: ExtractFormat,
145 pub title: Option<String>,
146 pub description: Option<String>,
147 pub author: Option<String>,
148 pub published_date: Option<String>,
149 pub word_count: usize,
150 pub links: Vec<Link>,
151 pub images: Vec<Image>,
152 pub metadata: Value,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct Link {
158 pub text: String,
159 pub href: String,
160 pub rel: Option<String>,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
165pub struct Image {
166 pub src: String,
167 pub alt: Option<String>,
168 pub width: Option<u32>,
169 pub height: Option<u32>,
170}
171
172#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct CaptureOptions {
175 pub format: CaptureFormat,
176 pub quality: u8,
177 pub full_page: bool,
178 pub clip: Option<ClipRect>,
179}
180
181impl Default for CaptureOptions {
182 fn default() -> Self {
183 Self {
184 format: CaptureFormat::Png,
185 quality: 90,
186 full_page: true,
187 clip: None,
188 }
189 }
190}
191
192#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
194pub enum CaptureFormat {
195 Png,
196 Jpeg,
197 Webp,
198}
199
200#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
202pub struct ClipRect {
203 pub x: f64,
204 pub y: f64,
205 pub width: f64,
206 pub height: f64,
207}
208
209#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct CapturedPage {
212 pub handle: PageHandle,
213 pub format: CaptureFormat,
214 pub data: Vec<u8>,
215 pub width: u32,
216 pub height: u32,
217}
218
219#[async_trait]
243pub trait WebBrowserAdapter: Send + Sync {
244 async fn connect(&mut self) -> WebAdapterResult<()>;
250
251 async fn disconnect(&mut self) -> WebAdapterResult<()>;
253
254 fn is_connected(&self) -> bool;
256
257 async fn navigate(&self, url: &str, options: NavigateOptions) -> WebAdapterResult<PageHandle>;
263
264 async fn wait_for_load(&self, handle: &PageHandle, timeout: Duration) -> WebAdapterResult<()>;
266
267 async fn go_back(&self, handle: &PageHandle) -> WebAdapterResult<()>;
269
270 async fn go_forward(&self, handle: &PageHandle) -> WebAdapterResult<()>;
272
273 async fn reload(&self, handle: &PageHandle) -> WebAdapterResult<()>;
275
276 async fn extract_content(
282 &self,
283 handle: &PageHandle,
284 options: ExtractOptions,
285 ) -> WebAdapterResult<ExtractedContent>;
286
287 async fn extract_links(&self, handle: &PageHandle) -> WebAdapterResult<Vec<Link>>;
289
290 async fn extract_structured(
292 &self,
293 handle: &PageHandle,
294 selector: &str,
295 ) -> WebAdapterResult<Value>;
296
297 async fn get_html(&self, handle: &PageHandle) -> WebAdapterResult<String>;
299
300 async fn capture_screenshot(
306 &self,
307 handle: &PageHandle,
308 options: CaptureOptions,
309 ) -> WebAdapterResult<CapturedPage>;
310
311 async fn capture_pdf(&self, handle: &PageHandle) -> WebAdapterResult<Vec<u8>>;
313
314 async fn click(&self, handle: &PageHandle, selector: &str) -> WebAdapterResult<()>;
320
321 async fn type_text(
323 &self,
324 handle: &PageHandle,
325 selector: &str,
326 text: &str,
327 ) -> WebAdapterResult<()>;
328
329 async fn select_option(
331 &self,
332 handle: &PageHandle,
333 selector: &str,
334 value: &str,
335 ) -> WebAdapterResult<()>;
336
337 async fn scroll(&self, handle: &PageHandle, x: f64, y: f64) -> WebAdapterResult<()>;
339
340 async fn wait_for_selector(
342 &self,
343 handle: &PageHandle,
344 selector: &str,
345 timeout: Duration,
346 ) -> WebAdapterResult<()>;
347
348 async fn evaluate_js(&self, handle: &PageHandle, script: &str) -> WebAdapterResult<Value>;
354
355 async fn inject_script(&self, handle: &PageHandle, script: &str) -> WebAdapterResult<()>;
357
358 async fn get_cookies(&self, handle: &PageHandle) -> WebAdapterResult<Vec<Cookie>>;
364
365 async fn set_cookie(&self, handle: &PageHandle, cookie: Cookie) -> WebAdapterResult<()>;
367
368 async fn clear_cookies(&self, handle: &PageHandle) -> WebAdapterResult<()>;
370
371 async fn get_local_storage(
373 &self,
374 handle: &PageHandle,
375 key: &str,
376 ) -> WebAdapterResult<Option<String>>;
377
378 async fn set_local_storage(
380 &self,
381 handle: &PageHandle,
382 key: &str,
383 value: &str,
384 ) -> WebAdapterResult<()>;
385}
386
387#[derive(Debug, Clone, Serialize, Deserialize)]
389pub struct Cookie {
390 pub name: String,
391 pub value: String,
392 pub domain: Option<String>,
393 pub path: Option<String>,
394 pub expires: Option<i64>,
395 pub http_only: bool,
396 pub secure: bool,
397 pub same_site: Option<String>,
398}