Skip to main content

roboticus_browser/
lib.rs

1//! # roboticus-browser
2//!
3//! Headless browser automation via Chrome DevTools Protocol (CDP) for the
4//! Roboticus agent runtime. Provides a high-level [`Browser`] facade that
5//! manages a Chromium process, establishes a CDP WebSocket session, and
6//! exposes 12 browser actions (navigate, click, type, screenshot, etc.).
7//!
8//! ## Key Types
9//!
10//! - [`Browser`] -- High-level facade combining process, CDP session, and actions
11//! - [`SharedBrowser`] -- `Arc<Browser>` alias for thread-safe sharing
12//! - [`PageInfo`] -- Page metadata (id, url, title)
13//! - [`ScreenshotResult`] -- Base64 screenshot with format and dimensions
14//! - [`PageContent`] -- Extracted page text content
15//!
16//! ## Modules
17//!
18//! - `actions` -- `BrowserAction` enum (12 variants), `ActionExecutor`, `ActionResult`
19//! - `cdp` -- Low-level CDP HTTP client for target listing
20//! - `manager` -- Chrome/Chromium process lifecycle (start, stop, detect)
21//! - `session` -- CDP WebSocket session (connect, send command, close)
22
23pub mod actions;
24pub mod cdp;
25pub mod manager;
26pub mod session;
27
28pub use roboticus_core::config::BrowserConfig;
29
30use serde::{Deserialize, Serialize};
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct PageInfo {
34    pub id: String,
35    pub url: String,
36    pub title: String,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct ScreenshotResult {
41    pub data_base64: String,
42    pub format: String,
43    pub width: u32,
44    pub height: u32,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct PageContent {
49    pub url: String,
50    pub title: String,
51    pub text: String,
52    pub html_length: usize,
53}
54
55use std::sync::Arc;
56use tokio::sync::RwLock;
57use tracing::warn;
58
59use roboticus_core::Result;
60
61/// High-level browser facade combining process management, CDP control, and action execution.
62pub struct Browser {
63    config: BrowserConfig,
64    manager: RwLock<manager::BrowserManager>,
65    session: RwLock<Option<session::CdpSession>>,
66}
67
68impl Browser {
69    pub fn new(config: BrowserConfig) -> Self {
70        let mgr = manager::BrowserManager::new(config.clone());
71        Self {
72            config,
73            manager: RwLock::new(mgr),
74            session: RwLock::new(None),
75        }
76    }
77
78    pub async fn start(&self) -> Result<()> {
79        let mut mgr = self.manager.write().await;
80        mgr.start().await?;
81
82        let cdp = cdp::CdpClient::new(self.config.cdp_port)?;
83
84        let mut attempts = 0;
85        let targets = loop {
86            match cdp.list_targets().await {
87                Ok(t) if !t.is_empty() => break t,
88                _ if attempts < 10 => {
89                    attempts += 1;
90                    tokio::time::sleep(std::time::Duration::from_millis(300)).await;
91                }
92                Ok(_) => {
93                    return Err(roboticus_core::RoboticusError::Tool {
94                        tool: "browser".into(),
95                        message: "no CDP targets available after startup".into(),
96                    });
97                }
98                Err(e) => return Err(e),
99            }
100        };
101
102        let ws_url = targets
103            .iter()
104            .find(|t| t.target_type == "page")
105            .and_then(|t| t.ws_url.clone())
106            .ok_or_else(|| roboticus_core::RoboticusError::Tool {
107                tool: "browser".into(),
108                message: "no page target with WebSocket URL found".into(),
109            })?;
110
111        let sess = session::CdpSession::connect(&ws_url).await?;
112        sess.send_command("Page.enable", serde_json::json!({}))
113            .await?;
114        sess.send_command("DOM.enable", serde_json::json!({}))
115            .await?;
116        sess.send_command("Network.enable", serde_json::json!({}))
117            .await?;
118        sess.send_command("Runtime.enable", serde_json::json!({}))
119            .await?;
120
121        *self.session.write().await = Some(sess);
122        Ok(())
123    }
124
125    pub async fn stop(&self) -> Result<()> {
126        if let Some(sess) = self.session.write().await.take() {
127            let _ = sess.close().await;
128        }
129        self.manager.write().await.stop().await
130    }
131
132    pub async fn is_running(&self) -> bool {
133        self.manager.read().await.is_running()
134    }
135
136    pub async fn execute_action(&self, action: &actions::BrowserAction) -> actions::ActionResult {
137        let initial = {
138            let session_guard = self.session.read().await;
139            match session_guard.as_ref() {
140                Some(sess) => actions::ActionExecutor::execute(sess, action).await,
141                None => {
142                    return actions::ActionResult::err(
143                        &format!("{:?}", action),
144                        "browser not started".into(),
145                    );
146                }
147            }
148        };
149
150        if initial.success
151            || !should_attempt_session_recovery(action, &initial)
152            || !is_idempotent_recovery_action(action)
153        {
154            return initial;
155        }
156
157        if let Err(err) = self.recover_session().await {
158            warn!(error = %err, "browser session recovery failed");
159            return actions::ActionResult::err(
160                &format!("{:?}", action),
161                format!(
162                    "browser session recovery failed: {err}; original error: {}",
163                    initial
164                        .error
165                        .unwrap_or_else(|| "unknown browser error".to_string())
166                ),
167            );
168        }
169
170        let session_guard = self.session.read().await;
171        match session_guard.as_ref() {
172            Some(sess) => actions::ActionExecutor::execute(sess, action).await,
173            None => actions::ActionResult::err(
174                &format!("{:?}", action),
175                "browser session unavailable after recovery".into(),
176            ),
177        }
178    }
179
180    pub fn cdp_port(&self) -> u16 {
181        self.config.cdp_port
182    }
183
184    async fn recover_session(&self) -> Result<()> {
185        let _ = self.stop().await;
186        self.start().await
187    }
188}
189
190fn should_attempt_session_recovery(
191    _action: &actions::BrowserAction,
192    result: &actions::ActionResult,
193) -> bool {
194    if result.success {
195        return false;
196    }
197    let Some(err) = result.error.as_deref() else {
198        return false;
199    };
200    let e = err.to_ascii_lowercase();
201    if e.contains("not started") {
202        return false;
203    }
204    e.contains("websocket")
205        || e.contains("closed")
206        || e.contains("connection reset")
207        || e.contains("broken pipe")
208        || e.contains("cdp read error")
209        || e.contains("cdp send failed")
210}
211
212fn is_idempotent_recovery_action(action: &actions::BrowserAction) -> bool {
213    matches!(
214        action,
215        actions::BrowserAction::Navigate { .. }
216            | actions::BrowserAction::Screenshot
217            | actions::BrowserAction::Pdf
218            | actions::BrowserAction::Evaluate { .. }
219            | actions::BrowserAction::GetCookies
220            | actions::BrowserAction::ReadPage
221            | actions::BrowserAction::GoBack
222            | actions::BrowserAction::GoForward
223            | actions::BrowserAction::Reload
224    )
225}
226
227/// Thread-safe wrapper for shared ownership.
228pub type SharedBrowser = Arc<Browser>;
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233
234    #[test]
235    fn browser_config_defaults() {
236        let cfg = BrowserConfig::default();
237        assert!(!cfg.enabled);
238        assert!(cfg.headless);
239        assert_eq!(cfg.cdp_port, 9222);
240        assert!(cfg.executable_path.is_none());
241    }
242
243    #[test]
244    fn page_info_serde() {
245        let info = PageInfo {
246            id: "page1".into(),
247            url: "https://example.com".into(),
248            title: "Example".into(),
249        };
250        let json = serde_json::to_string(&info).unwrap();
251        let back: PageInfo = serde_json::from_str(&json).unwrap();
252        assert_eq!(back.id, "page1");
253        assert_eq!(back.url, "https://example.com");
254    }
255
256    #[test]
257    fn screenshot_result_serde() {
258        let result = ScreenshotResult {
259            data_base64: "abc123".into(),
260            format: "png".into(),
261            width: 1920,
262            height: 1080,
263        };
264        let json = serde_json::to_string(&result).unwrap();
265        let back: ScreenshotResult = serde_json::from_str(&json).unwrap();
266        assert_eq!(back.width, 1920);
267    }
268
269    #[test]
270    fn browser_facade_creation() {
271        let browser = Browser::new(BrowserConfig::default());
272        assert_eq!(browser.cdp_port(), 9222);
273    }
274
275    #[tokio::test]
276    async fn browser_not_running_initially() {
277        let browser = Browser::new(BrowserConfig::default());
278        assert!(!browser.is_running().await);
279    }
280
281    #[tokio::test]
282    async fn execute_action_without_start_returns_error() {
283        let browser = Browser::new(BrowserConfig::default());
284        let result = browser
285            .execute_action(&actions::BrowserAction::Screenshot)
286            .await;
287        assert!(!result.success);
288        assert!(result.error.as_deref().unwrap().contains("not started"));
289    }
290
291    #[tokio::test]
292    async fn navigate_without_browser_returns_error_not_panic() {
293        let browser = Browser::new(BrowserConfig::default());
294        let action = actions::BrowserAction::Navigate {
295            url: "https://example.com".into(),
296        };
297        let result = browser.execute_action(&action).await;
298        assert!(
299            !result.success,
300            "navigate should fail when browser isn't started"
301        );
302        assert!(result.error.is_some());
303        assert!(result.data.is_none());
304    }
305
306    #[tokio::test]
307    async fn all_actions_return_error_without_session() {
308        let browser = Browser::new(BrowserConfig::default());
309        let cases = vec![
310            actions::BrowserAction::Navigate {
311                url: "https://example.com".into(),
312            },
313            actions::BrowserAction::Click {
314                selector: "#btn".into(),
315            },
316            actions::BrowserAction::Type {
317                selector: "input".into(),
318                text: "hello".into(),
319            },
320            actions::BrowserAction::Screenshot,
321            actions::BrowserAction::Evaluate {
322                expression: "1+1".into(),
323            },
324            actions::BrowserAction::ReadPage,
325            actions::BrowserAction::Reload,
326        ];
327        for action in &cases {
328            let result = browser.execute_action(action).await;
329            assert!(
330                !result.success,
331                "action {:?} should fail without session",
332                action
333            );
334            assert!(result.error.is_some());
335        }
336    }
337
338    #[tokio::test]
339    async fn all_12_actions_return_error_without_session() {
340        let browser = Browser::new(BrowserConfig::default());
341        let cases = vec![
342            actions::BrowserAction::Navigate {
343                url: "https://example.com".into(),
344            },
345            actions::BrowserAction::Click {
346                selector: "#btn".into(),
347            },
348            actions::BrowserAction::Type {
349                selector: "input".into(),
350                text: "hello".into(),
351            },
352            actions::BrowserAction::Screenshot,
353            actions::BrowserAction::Pdf,
354            actions::BrowserAction::Evaluate {
355                expression: "1+1".into(),
356            },
357            actions::BrowserAction::GetCookies,
358            actions::BrowserAction::ClearCookies,
359            actions::BrowserAction::ReadPage,
360            actions::BrowserAction::GoBack,
361            actions::BrowserAction::GoForward,
362            actions::BrowserAction::Reload,
363        ];
364        for action in &cases {
365            let result = browser.execute_action(action).await;
366            assert!(
367                !result.success,
368                "action {:?} should fail without session",
369                action
370            );
371            assert!(result.error.is_some());
372            assert!(
373                result.error.as_deref().unwrap().contains("not started"),
374                "error should mention 'not started' for {:?}: {:?}",
375                action,
376                result.error
377            );
378        }
379    }
380
381    #[test]
382    fn session_recovery_detection_for_disconnect_signatures() {
383        let action = actions::BrowserAction::Navigate {
384            url: "https://example.com".to_string(),
385        };
386        let recoverable =
387            actions::ActionResult::err("Navigate", "CDP WebSocket closed unexpectedly".to_string());
388        assert!(should_attempt_session_recovery(&action, &recoverable));
389
390        let non_recoverable =
391            actions::ActionResult::err("Navigate", "browser not started".to_string());
392        assert!(!should_attempt_session_recovery(&action, &non_recoverable));
393    }
394
395    #[test]
396    fn session_recovery_detection_ignores_policy_errors() {
397        let action = actions::BrowserAction::Navigate {
398            url: "https://example.com".to_string(),
399        };
400        let blocked = actions::ActionResult::err(
401            "Navigate",
402            "URL scheme is blocked for security: file:///etc/passwd".to_string(),
403        );
404        assert!(
405            !should_attempt_session_recovery(&action, &blocked),
406            "security/policy denials should not trigger recovery loops"
407        );
408    }
409
410    #[test]
411    fn session_recovery_replay_is_limited_to_idempotent_actions() {
412        assert!(is_idempotent_recovery_action(
413            &actions::BrowserAction::ReadPage
414        ));
415        assert!(!is_idempotent_recovery_action(
416            &actions::BrowserAction::Click {
417                selector: "#submit".to_string(),
418            }
419        ));
420        assert!(!is_idempotent_recovery_action(
421            &actions::BrowserAction::Type {
422                selector: "input".to_string(),
423                text: "abc".to_string(),
424            }
425        ));
426        assert!(!is_idempotent_recovery_action(
427            &actions::BrowserAction::ClearCookies
428        ));
429    }
430
431    #[test]
432    fn page_content_serde() {
433        let content = PageContent {
434            url: "https://example.com".into(),
435            title: "Example".into(),
436            text: "Hello world".into(),
437            html_length: 1234,
438        };
439        let json = serde_json::to_string(&content).unwrap();
440        let back: PageContent = serde_json::from_str(&json).unwrap();
441        assert_eq!(back.url, "https://example.com");
442        assert_eq!(back.title, "Example");
443        assert_eq!(back.text, "Hello world");
444        assert_eq!(back.html_length, 1234);
445    }
446
447    #[test]
448    fn browser_custom_config() {
449        let config = BrowserConfig {
450            enabled: true,
451            headless: false,
452            cdp_port: 9333,
453            ..Default::default()
454        };
455        let browser = Browser::new(config);
456        assert_eq!(browser.cdp_port(), 9333);
457    }
458
459    #[tokio::test]
460    async fn stop_without_start_is_ok() {
461        let browser = Browser::new(BrowserConfig::default());
462        let result = browser.stop().await;
463        assert!(result.is_ok());
464    }
465
466    #[tokio::test]
467    async fn shared_browser_type() {
468        let browser = Browser::new(BrowserConfig::default());
469        let shared: SharedBrowser = Arc::new(browser);
470        assert_eq!(shared.cdp_port(), 9222);
471        assert!(!shared.is_running().await);
472    }
473
474    #[test]
475    fn screenshot_result_fields() {
476        let result = ScreenshotResult {
477            data_base64: "iVBORw0KGgo=".into(),
478            format: "png".into(),
479            width: 800,
480            height: 600,
481        };
482        assert_eq!(result.format, "png");
483        assert_eq!(result.width, 800);
484        assert_eq!(result.height, 600);
485        assert!(!result.data_base64.is_empty());
486    }
487
488    #[test]
489    fn page_info_debug_and_clone() {
490        let info = PageInfo {
491            id: "p1".into(),
492            url: "https://example.com".into(),
493            title: "Test".into(),
494        };
495        let cloned = info.clone();
496        assert_eq!(cloned.id, "p1");
497        let debug_str = format!("{:?}", info);
498        assert!(debug_str.contains("p1"));
499    }
500}