Skip to main content

roboticus_browser/
lib.rs

1//! # roboticus-browser
2//!
3//! Headless browser automation via Chrome DevTools Protocol (CDP) for the
4//! Roboticus agent runtime. Provides a high-level [`Browser`] facade that
5//! manages a Chromium process, establishes a CDP WebSocket session, and
6//! exposes 12 browser actions (navigate, click, type, screenshot, etc.).
7//!
8//! ## Key Types
9//!
10//! - [`Browser`] -- High-level facade combining process, CDP session, and actions
11//! - [`SharedBrowser`] -- `Arc<Browser>` alias for thread-safe sharing
12//! - [`PageInfo`] -- Page metadata (id, url, title)
13//! - [`ScreenshotResult`] -- Base64 screenshot with format and dimensions
14//! - [`PageContent`] -- Extracted page text content
15//!
16//! ## Modules
17//!
18//! - `actions` -- `BrowserAction` enum (12 variants), `ActionExecutor`, `ActionResult`
19//! - `cdp` -- Low-level CDP HTTP client for target listing
20//! - `manager` -- Chrome/Chromium process lifecycle (start, stop, detect)
21//! - `session` -- CDP WebSocket session (connect, send command, close)
22
23pub mod actions;
24pub mod agent_browser_backend;
25pub mod backend;
26pub mod cdp;
27pub mod manager;
28pub mod session;
29
30pub use roboticus_core::config::BrowserConfig;
31
32use serde::{Deserialize, Serialize};
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct PageInfo {
36    pub id: String,
37    pub url: String,
38    pub title: String,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ScreenshotResult {
43    pub data_base64: String,
44    pub format: String,
45    pub width: u32,
46    pub height: u32,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct PageContent {
51    pub url: String,
52    pub title: String,
53    pub text: String,
54    pub html_length: usize,
55}
56
57use std::sync::Arc;
58use tokio::sync::RwLock;
59use tracing::warn;
60
61use roboticus_core::Result;
62
63/// High-level browser facade combining process management, CDP control, and action execution.
64pub struct Browser {
65    config: BrowserConfig,
66    manager: RwLock<manager::BrowserManager>,
67    session: RwLock<Option<session::CdpSession>>,
68}
69
70impl Browser {
71    pub fn new(config: BrowserConfig) -> Self {
72        let mgr = manager::BrowserManager::new(config.clone());
73        Self {
74            config,
75            manager: RwLock::new(mgr),
76            session: RwLock::new(None),
77        }
78    }
79
80    pub async fn start(&self) -> Result<()> {
81        let mut mgr = self.manager.write().await;
82        mgr.start().await?;
83
84        let cdp = cdp::CdpClient::new(self.config.cdp_port)?;
85
86        let mut attempts = 0;
87        let targets = loop {
88            match cdp.list_targets().await {
89                Ok(t) if !t.is_empty() => break t,
90                _ if attempts < 10 => {
91                    attempts += 1;
92                    tokio::time::sleep(std::time::Duration::from_millis(300)).await;
93                }
94                Ok(_) => {
95                    return Err(roboticus_core::RoboticusError::Tool {
96                        tool: "browser".into(),
97                        message: "no CDP targets available after startup".into(),
98                    });
99                }
100                Err(e) => return Err(e),
101            }
102        };
103
104        let ws_url = targets
105            .iter()
106            .find(|t| t.target_type == "page")
107            .and_then(|t| t.ws_url.clone())
108            .ok_or_else(|| roboticus_core::RoboticusError::Tool {
109                tool: "browser".into(),
110                message: "no page target with WebSocket URL found".into(),
111            })?;
112
113        let sess = session::CdpSession::connect(&ws_url).await?;
114        sess.send_command("Page.enable", serde_json::json!({}))
115            .await?;
116        sess.send_command("DOM.enable", serde_json::json!({}))
117            .await?;
118        sess.send_command("Network.enable", serde_json::json!({}))
119            .await?;
120        sess.send_command("Runtime.enable", serde_json::json!({}))
121            .await?;
122
123        *self.session.write().await = Some(sess);
124        Ok(())
125    }
126
127    pub async fn stop(&self) -> Result<()> {
128        if let Some(sess) = self.session.write().await.take() {
129            let _ = sess.close().await;
130        }
131        self.manager.write().await.stop().await
132    }
133
134    pub async fn is_running(&self) -> bool {
135        self.manager.read().await.is_running()
136    }
137
138    pub async fn execute_action(&self, action: &actions::BrowserAction) -> actions::ActionResult {
139        let initial = {
140            let session_guard = self.session.read().await;
141            match session_guard.as_ref() {
142                Some(sess) => actions::ActionExecutor::execute(sess, action).await,
143                None => {
144                    return actions::ActionResult::err(
145                        &format!("{:?}", action),
146                        "browser not started".into(),
147                    );
148                }
149            }
150        };
151
152        if initial.success
153            || !should_attempt_session_recovery(action, &initial)
154            || !is_idempotent_recovery_action(action)
155        {
156            return initial;
157        }
158
159        if let Err(err) = self.recover_session().await {
160            warn!(error = %err, "browser session recovery failed");
161            return actions::ActionResult::err(
162                &format!("{:?}", action),
163                format!(
164                    "browser session recovery failed: {err}; original error: {}",
165                    initial
166                        .error
167                        .unwrap_or_else(|| "unknown browser error".to_string())
168                ),
169            );
170        }
171
172        let session_guard = self.session.read().await;
173        match session_guard.as_ref() {
174            Some(sess) => actions::ActionExecutor::execute(sess, action).await,
175            None => actions::ActionResult::err(
176                &format!("{:?}", action),
177                "browser session unavailable after recovery".into(),
178            ),
179        }
180    }
181
182    pub fn cdp_port(&self) -> u16 {
183        self.config.cdp_port
184    }
185
186    async fn recover_session(&self) -> Result<()> {
187        let _ = self.stop().await;
188        self.start().await
189    }
190}
191
192fn should_attempt_session_recovery(
193    _action: &actions::BrowserAction,
194    result: &actions::ActionResult,
195) -> bool {
196    if result.success {
197        return false;
198    }
199    let Some(err) = result.error.as_deref() else {
200        return false;
201    };
202    let e = err.to_ascii_lowercase();
203    if e.contains("not started") {
204        return false;
205    }
206    e.contains("websocket")
207        || e.contains("closed")
208        || e.contains("connection reset")
209        || e.contains("broken pipe")
210        || e.contains("cdp read error")
211        || e.contains("cdp send failed")
212}
213
214fn is_idempotent_recovery_action(action: &actions::BrowserAction) -> bool {
215    matches!(
216        action,
217        actions::BrowserAction::Navigate { .. }
218            | actions::BrowserAction::Screenshot
219            | actions::BrowserAction::Pdf
220            | actions::BrowserAction::Evaluate { .. }
221            | actions::BrowserAction::GetCookies
222            | actions::BrowserAction::ReadPage
223            | actions::BrowserAction::GoBack
224            | actions::BrowserAction::GoForward
225            | actions::BrowserAction::Reload
226    )
227}
228
229/// Thread-safe wrapper for shared ownership.
230pub type SharedBrowser = Arc<Browser>;
231
232#[cfg(test)]
233mod tests {
234    use super::*;
235
236    #[test]
237    fn browser_config_defaults() {
238        let cfg = BrowserConfig::default();
239        assert!(!cfg.enabled);
240        assert!(cfg.headless);
241        assert_eq!(cfg.cdp_port, 9222);
242        assert!(cfg.executable_path.is_none());
243    }
244
245    #[test]
246    fn page_info_serde() {
247        let info = PageInfo {
248            id: "page1".into(),
249            url: "https://example.com".into(),
250            title: "Example".into(),
251        };
252        let json = serde_json::to_string(&info).unwrap();
253        let back: PageInfo = serde_json::from_str(&json).unwrap();
254        assert_eq!(back.id, "page1");
255        assert_eq!(back.url, "https://example.com");
256    }
257
258    #[test]
259    fn screenshot_result_serde() {
260        let result = ScreenshotResult {
261            data_base64: "abc123".into(),
262            format: "png".into(),
263            width: 1920,
264            height: 1080,
265        };
266        let json = serde_json::to_string(&result).unwrap();
267        let back: ScreenshotResult = serde_json::from_str(&json).unwrap();
268        assert_eq!(back.width, 1920);
269    }
270
271    #[test]
272    fn browser_facade_creation() {
273        let browser = Browser::new(BrowserConfig::default());
274        assert_eq!(browser.cdp_port(), 9222);
275    }
276
277    #[tokio::test]
278    async fn browser_not_running_initially() {
279        let browser = Browser::new(BrowserConfig::default());
280        assert!(!browser.is_running().await);
281    }
282
283    #[tokio::test]
284    async fn execute_action_without_start_returns_error() {
285        let browser = Browser::new(BrowserConfig::default());
286        let result = browser
287            .execute_action(&actions::BrowserAction::Screenshot)
288            .await;
289        assert!(!result.success);
290        assert!(result.error.as_deref().unwrap().contains("not started"));
291    }
292
293    #[tokio::test]
294    async fn navigate_without_browser_returns_error_not_panic() {
295        let browser = Browser::new(BrowserConfig::default());
296        let action = actions::BrowserAction::Navigate {
297            url: "https://example.com".into(),
298        };
299        let result = browser.execute_action(&action).await;
300        assert!(
301            !result.success,
302            "navigate should fail when browser isn't started"
303        );
304        assert!(result.error.is_some());
305        assert!(result.data.is_none());
306    }
307
308    #[tokio::test]
309    async fn all_actions_return_error_without_session() {
310        let browser = Browser::new(BrowserConfig::default());
311        let cases = vec![
312            actions::BrowserAction::Navigate {
313                url: "https://example.com".into(),
314            },
315            actions::BrowserAction::Click {
316                selector: "#btn".into(),
317            },
318            actions::BrowserAction::Type {
319                selector: "input".into(),
320                text: "hello".into(),
321            },
322            actions::BrowserAction::Screenshot,
323            actions::BrowserAction::Evaluate {
324                expression: "1+1".into(),
325            },
326            actions::BrowserAction::ReadPage,
327            actions::BrowserAction::Reload,
328        ];
329        for action in &cases {
330            let result = browser.execute_action(action).await;
331            assert!(
332                !result.success,
333                "action {:?} should fail without session",
334                action
335            );
336            assert!(result.error.is_some());
337        }
338    }
339
340    #[tokio::test]
341    async fn all_12_actions_return_error_without_session() {
342        let browser = Browser::new(BrowserConfig::default());
343        let cases = vec![
344            actions::BrowserAction::Navigate {
345                url: "https://example.com".into(),
346            },
347            actions::BrowserAction::Click {
348                selector: "#btn".into(),
349            },
350            actions::BrowserAction::Type {
351                selector: "input".into(),
352                text: "hello".into(),
353            },
354            actions::BrowserAction::Screenshot,
355            actions::BrowserAction::Pdf,
356            actions::BrowserAction::Evaluate {
357                expression: "1+1".into(),
358            },
359            actions::BrowserAction::GetCookies,
360            actions::BrowserAction::ClearCookies,
361            actions::BrowserAction::ReadPage,
362            actions::BrowserAction::GoBack,
363            actions::BrowserAction::GoForward,
364            actions::BrowserAction::Reload,
365        ];
366        for action in &cases {
367            let result = browser.execute_action(action).await;
368            assert!(
369                !result.success,
370                "action {:?} should fail without session",
371                action
372            );
373            assert!(result.error.is_some());
374            assert!(
375                result.error.as_deref().unwrap().contains("not started"),
376                "error should mention 'not started' for {:?}: {:?}",
377                action,
378                result.error
379            );
380        }
381    }
382
383    #[test]
384    fn session_recovery_detection_for_disconnect_signatures() {
385        let action = actions::BrowserAction::Navigate {
386            url: "https://example.com".to_string(),
387        };
388        let recoverable =
389            actions::ActionResult::err("Navigate", "CDP WebSocket closed unexpectedly".to_string());
390        assert!(should_attempt_session_recovery(&action, &recoverable));
391
392        let non_recoverable =
393            actions::ActionResult::err("Navigate", "browser not started".to_string());
394        assert!(!should_attempt_session_recovery(&action, &non_recoverable));
395    }
396
397    #[test]
398    fn session_recovery_detection_ignores_policy_errors() {
399        let action = actions::BrowserAction::Navigate {
400            url: "https://example.com".to_string(),
401        };
402        let blocked = actions::ActionResult::err(
403            "Navigate",
404            "URL scheme is blocked for security: file:///etc/passwd".to_string(),
405        );
406        assert!(
407            !should_attempt_session_recovery(&action, &blocked),
408            "security/policy denials should not trigger recovery loops"
409        );
410    }
411
412    #[test]
413    fn session_recovery_replay_is_limited_to_idempotent_actions() {
414        assert!(is_idempotent_recovery_action(
415            &actions::BrowserAction::ReadPage
416        ));
417        assert!(!is_idempotent_recovery_action(
418            &actions::BrowserAction::Click {
419                selector: "#submit".to_string(),
420            }
421        ));
422        assert!(!is_idempotent_recovery_action(
423            &actions::BrowserAction::Type {
424                selector: "input".to_string(),
425                text: "abc".to_string(),
426            }
427        ));
428        assert!(!is_idempotent_recovery_action(
429            &actions::BrowserAction::ClearCookies
430        ));
431    }
432
433    #[test]
434    fn page_content_serde() {
435        let content = PageContent {
436            url: "https://example.com".into(),
437            title: "Example".into(),
438            text: "Hello world".into(),
439            html_length: 1234,
440        };
441        let json = serde_json::to_string(&content).unwrap();
442        let back: PageContent = serde_json::from_str(&json).unwrap();
443        assert_eq!(back.url, "https://example.com");
444        assert_eq!(back.title, "Example");
445        assert_eq!(back.text, "Hello world");
446        assert_eq!(back.html_length, 1234);
447    }
448
449    #[test]
450    fn browser_custom_config() {
451        let config = BrowserConfig {
452            enabled: true,
453            headless: false,
454            cdp_port: 9333,
455            ..Default::default()
456        };
457        let browser = Browser::new(config);
458        assert_eq!(browser.cdp_port(), 9333);
459    }
460
461    #[tokio::test]
462    async fn stop_without_start_is_ok() {
463        let browser = Browser::new(BrowserConfig::default());
464        let result = browser.stop().await;
465        assert!(result.is_ok());
466    }
467
468    #[tokio::test]
469    async fn shared_browser_type() {
470        let browser = Browser::new(BrowserConfig::default());
471        let shared: SharedBrowser = Arc::new(browser);
472        assert_eq!(shared.cdp_port(), 9222);
473        assert!(!shared.is_running().await);
474    }
475
476    #[test]
477    fn screenshot_result_fields() {
478        let result = ScreenshotResult {
479            data_base64: "iVBORw0KGgo=".into(),
480            format: "png".into(),
481            width: 800,
482            height: 600,
483        };
484        assert_eq!(result.format, "png");
485        assert_eq!(result.width, 800);
486        assert_eq!(result.height, 600);
487        assert!(!result.data_base64.is_empty());
488    }
489
490    #[test]
491    fn page_info_debug_and_clone() {
492        let info = PageInfo {
493            id: "p1".into(),
494            url: "https://example.com".into(),
495            title: "Test".into(),
496        };
497        let cloned = info.clone();
498        assert_eq!(cloned.id, "p1");
499        let debug_str = format!("{:?}", info);
500        assert!(debug_str.contains("p1"));
501    }
502}