Skip to main content

ferrous_browser/
browser.rs

1use crate::cdp::{spawn_writer_task, CDPClient};
2use crate::connection::Connection;
3use crate::error::{BrowserError, Result};
4use crate::page::Page;
5use serde_json::json;
6use std::net::TcpListener;
7use std::process::Stdio;
8use std::sync::Arc;
9use std::time::Duration;
10use tokio::io::{AsyncBufReadExt, BufReader};
11use tokio::process::{Child, Command};
12use tokio::sync::RwLock;
13use tracing::Instrument;
14
15// ── P4: BrowserConfig ────────────────────────────────────────────────────────
16
17/// Configuration options for launching a Chrome/Chromium instance.
18///
19/// Use [`BrowserConfig::default()`] to get sensible defaults, then
20/// customise the fields you need.
21///
22/// # Example
23///
24/// ```no_run
25/// use ferrous_browser::{Browser, BrowserConfig};
26/// use std::time::Duration;
27///
28/// # #[tokio::main]
29/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
30/// let config = BrowserConfig {
31///     headless: true,
32///     timeout: Duration::from_secs(60),
33///     viewport: (1920, 1080),
34///     args: vec!["--disable-extensions".to_string()],
35/// };
36/// let browser = Browser::launch_chrome(Some(config)).await?;
37/// # Ok(())
38/// # }
39/// ```
40#[derive(Debug, Clone)]
41pub struct BrowserConfig {
42    /// Run Chrome in headless mode (default: `true`).
43    pub headless: bool,
44    /// Maximum time to wait for Chrome to start (default: 30 s).
45    pub timeout: Duration,
46    /// Viewport size as `(width, height)` in logical pixels (default: `1280 x 720`).
47    pub viewport: (u32, u32),
48    /// Additional Chrome command-line arguments appended after the built-in flags.
49    pub args: Vec<String>,
50}
51
52impl Default for BrowserConfig {
53    fn default() -> Self {
54        Self {
55            headless: true,
56            timeout: Duration::from_secs(30),
57            viewport: (1280, 720),
58            args: Vec::new(),
59        }
60    }
61}
62
63// ── Browser ──────────────────────────────────────────────────────────────────
64
65/// A handle to a Chrome/Chromium browser instance.
66///
67/// # Example
68///
69/// ```no_run
70/// use ferrous_browser::{Browser, WaitUntil};
71///
72/// #[tokio::main]
73/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
74///     let browser = Browser::launch_chrome(None).await?;
75///     let page = browser.new_page().await?;
76///     page.goto("https://example.com", WaitUntil::Load).await?;
77///     Ok(())
78/// }
79/// ```
80pub struct Browser {
81    cdp: Arc<CDPClient>,
82    pages: Arc<RwLock<Vec<Page>>>,
83    _child: Option<Child>,
84}
85
86impl Browser {
87    fn find_chrome() -> Option<String> {
88        #[cfg(target_os = "macos")]
89        let candidates: &[&str] = &[
90            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
91            "/Applications/Chromium.app/Contents/MacOS/Chromium",
92            "google-chrome",
93            "chromium-browser",
94            "chromium",
95        ];
96        #[cfg(target_os = "linux")]
97        let candidates: &[&str] = &[
98            "google-chrome",
99            "google-chrome-stable",
100            "chromium-browser",
101            "chromium",
102        ];
103        #[cfg(target_os = "windows")]
104        let candidates: &[&str] = &[
105            "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
106            "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
107            "chrome",
108        ];
109        #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
110        let candidates: &[&str] = &["google-chrome", "chromium-browser", "chromium"];
111
112        for candidate in candidates {
113            if std::path::Path::new(candidate).exists() || which::which(candidate).is_ok() {
114                return Some(candidate.to_string());
115            }
116        }
117        None
118    }
119
120    /// Pick a free TCP port on localhost.
121    fn free_port() -> Result<u16> {
122        TcpListener::bind("127.0.0.1:0")
123            .map(|l| l.local_addr().unwrap().port())
124            .map_err(|e| {
125                BrowserError::BrowserNotLaunched(format!("Could not find a free port: {e}"))
126            })
127    }
128
129    /// Launch Chrome/Chromium and connect to it automatically.
130    ///
131    /// Pass `None` to use [`BrowserConfig::default`].
132    ///
133    /// # Example
134    ///
135    /// ```no_run
136    /// use ferrous_browser::{Browser, BrowserConfig};
137    ///
138    /// # #[tokio::main]
139    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
140    /// let browser = Browser::launch_chrome(None).await?;
141    ///
142    /// let config = BrowserConfig { headless: false, ..Default::default() };
143    /// let browser = Browser::launch_chrome(Some(config)).await?;
144    /// # Ok(())
145    /// # }
146    /// ```
147    pub async fn launch_chrome(config: Option<BrowserConfig>) -> Result<Self> {
148        let span = tracing::info_span!("Browser::launch_chrome");
149        let _enter = span.enter();
150        let config = config.unwrap_or_default();
151
152        let chrome_path = Self::find_chrome().ok_or_else(|| {
153            BrowserError::BrowserNotLaunched(
154                "Chrome/Chromium not found. Install Google Chrome or set a custom path via BrowserConfig::args.".to_string(),
155            )
156        })?;
157
158        // Use a dynamically-assigned free port so multiple instances never conflict
159        let port = Self::free_port()?;
160
161        let mut chrome_args: Vec<String> = vec![
162            format!("--remote-debugging-port={port}"),
163            "--no-sandbox".to_string(),
164            "--disable-gpu".to_string(),
165            "--disable-dev-shm-usage".to_string(),
166            format!("--window-size={},{}", config.viewport.0, config.viewport.1),
167        ];
168        if config.headless {
169            chrome_args.push("--headless=new".to_string());
170        }
171        chrome_args.extend(config.args.iter().cloned());
172
173        let mut child = tracing::info_span!("spawn_chrome").in_scope(|| {
174            Command::new(&chrome_path)
175                .args(&chrome_args)
176                .stderr(Stdio::piped())
177                .stdout(Stdio::null())
178                .stdin(Stdio::null())
179                // We manage Chrome's lifetime via SIGTERM in Drop, matching
180                // the prior behavior, so don't let tokio kill it on drop.
181                .kill_on_drop(false)
182                .spawn()
183                .map_err(|e| {
184                    BrowserError::BrowserNotLaunched(format!(
185                        "Failed to spawn Chrome at '{chrome_path}': {e}"
186                    ))
187                })
188        })?;
189
190        let pid = child.id().ok_or_else(|| {
191            BrowserError::BrowserNotLaunched("Chrome exited before reporting a pid".to_string())
192        })?;
193        tracing::debug!(pid, "Chrome launched successfully");
194
195        // Chrome announces readiness on stderr as soon as the devtools server
196        // is listening:
197        //     DevTools listening on ws://127.0.0.1:<port>/devtools/browser/<id>
198        // Read that and skip the HTTP-poll-and-200ms-sleep dance entirely.
199        let stderr = child.stderr.take().expect("stderr is piped");
200
201        let ws_url =
202            tokio::time::timeout(config.timeout, async {
203                let mut reader = BufReader::new(stderr).lines();
204                while let Some(line) = reader.next_line().await.map_err(|e| {
205                    BrowserError::BrowserNotLaunched(format!("stderr read failed: {e}"))
206                })? {
207                    const PREFIX: &str = "DevTools listening on ";
208                    if let Some(idx) = line.find(PREFIX) {
209                        let url = line[idx + PREFIX.len()..].trim().to_string();
210                        // Keep draining stderr so the pipe never fills up and
211                        // blocks Chrome on a future write.
212                        tokio::spawn(async move {
213                            let mut reader = reader;
214                            while let Ok(Some(_)) = reader.next_line().await {}
215                        });
216                        return Ok::<String, BrowserError>(url);
217                    }
218                }
219                Err(BrowserError::BrowserNotLaunched(
220                    "Chrome exited before announcing its DevTools port".to_string(),
221                ))
222            })
223            .instrument(tracing::info_span!("wait_for_chrome_ready"))
224            .await
225            .map_err(|_| {
226                BrowserError::BrowserNotLaunched(format!(
227                    "Chrome did not start within {}s",
228                    config.timeout.as_secs()
229                ))
230            })??;
231
232        Self::connect_internal(ws_url, Some(child)).await
233    }
234
235    /// Connect to a CDP WebSocket URL directly.
236    ///
237    /// # Example
238    ///
239    /// ```no_run
240    /// use ferrous_browser::Browser;
241    ///
242    /// # #[tokio::main]
243    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
244    /// let browser = Browser::connect("ws://localhost:9222".to_string()).await?;
245    /// # Ok(())
246    /// # }
247    /// ```
248    pub async fn connect(ws_url: String) -> Result<Self> {
249        Self::connect_internal(ws_url, None).await
250    }
251
252    /// Connect to a Chrome instance already running on `localhost:9222`.
253    ///
254    /// # Example
255    ///
256    /// ```no_run
257    /// use ferrous_browser::Browser;
258    ///
259    /// # #[tokio::main]
260    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
261    /// let browser = Browser::launch().await?;
262    /// # Ok(())
263    /// # }
264    /// ```
265    pub async fn launch() -> Result<Self> {
266        Self::connect("ws://localhost:9222".to_string()).await
267    }
268
269    async fn connect_internal(ws_url: String, child: Option<Child>) -> Result<Self> {
270        use futures_util::StreamExt;
271        let cdp = Arc::new(CDPClient::new(ws_url));
272        let ws_stream = cdp.connect().await?;
273        let (sink, stream) = ws_stream.split();
274
275        let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
276        cdp.set_writer(tx);
277        spawn_writer_task(sink, rx, cdp.clone());
278
279        let conn = Connection::new(cdp.clone(), stream);
280        tokio::spawn(conn.run());
281
282        // Enable auto-attach so new targets connect instantly without round-trip
283        cdp.send_command(
284            "Target.setAutoAttach".to_string(),
285            Some(json!({
286                "autoAttach": true,
287                "waitForDebuggerOnStart": false,
288                "flatten": true
289            })),
290        )
291        .await?;
292
293        Ok(Browser {
294            cdp,
295            pages: Arc::new(RwLock::new(Vec::new())),
296            _child: child,
297        })
298    }
299
300    /// Create a new page/tab in the browser.
301    #[tracing::instrument(level = "info", skip(self))]
302    pub async fn new_page(&self) -> Result<Page> {
303        // Subscribe to events BEFORE creating target so we don't miss attachedToTarget
304        let mut event_rx = self.cdp.subscribe_events();
305
306        let target_response = self
307            .cdp
308            .send_command(
309                "Target.createTarget".to_string(),
310                Some(json!({ "url": "about:blank" })),
311            )
312            .await?;
313
314        let target_id = target_response
315            .get("targetId")
316            .and_then(|v| v.as_str())
317            .ok_or_else(|| {
318                BrowserError::invalid_response(
319                    "new_page()",
320                    "missing targetId in Target.createTarget response",
321                )
322            })?
323            .to_string();
324
325        // Wait for the automatic Target.attachedToTarget event for this targetId
326        let target_id_for_span = target_id.clone();
327        let session_id = async {
328            loop {
329                match event_rx.recv().await {
330                    Ok(msg) if msg.method.as_deref() == Some("Target.attachedToTarget") => {
331                        if let Some(params) = msg.params {
332                            let msg_target_id = params
333                                .get("targetInfo")
334                                .and_then(|t| t.get("targetId"))
335                                .and_then(|t| t.as_str());
336                            if msg_target_id == Some(&target_id) {
337                                if let Some(sess_id) =
338                                    params.get("sessionId").and_then(|s| s.as_str())
339                                {
340                                    return Ok::<String, BrowserError>(sess_id.to_string());
341                                }
342                            }
343                        }
344                    }
345                    Ok(_) => {} // ignore other events
346                    Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {}
347                    Err(_) => {
348                        return Err(BrowserError::invalid_response(
349                            "new_page()",
350                            "event channel closed before Target.attachedToTarget",
351                        ));
352                    }
353                }
354            }
355        }
356        .instrument(tracing::info_span!(
357            "await_attachedToTarget",
358            target_id = %target_id_for_span
359        ))
360        .await?;
361
362        let page = Page::new(target_id, session_id, self.cdp.clone());
363        self.pages.write().await.push(page.clone());
364        Ok(page)
365    }
366
367    /// Get the number of open pages/tabs.
368    pub async fn page_count(&self) -> usize {
369        self.pages.read().await.len()
370    }
371}
372
373impl Drop for Browser {
374    fn drop(&mut self) {
375        if let Some(child) = self._child.as_mut() {
376            let _ = child.start_kill().map_err(|e| {
377                tracing::warn!(error = %e, "Failed to kill Chrome process");
378            });
379        }
380    }
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386
387    #[test]
388    fn test_browser_config_defaults() {
389        let cfg = BrowserConfig::default();
390        assert!(cfg.headless);
391        assert_eq!(cfg.viewport, (1280, 720));
392        assert_eq!(cfg.timeout, Duration::from_secs(30));
393        assert!(cfg.args.is_empty());
394    }
395
396    #[test]
397    fn test_browser_config_custom() {
398        let cfg = BrowserConfig {
399            headless: false,
400            timeout: Duration::from_secs(60),
401            viewport: (1920, 1080),
402            args: vec!["--disable-extensions".to_string()],
403        };
404        assert!(!cfg.headless);
405        assert_eq!(cfg.viewport, (1920, 1080));
406        assert_eq!(cfg.timeout, Duration::from_secs(60));
407        assert_eq!(cfg.args, vec!["--disable-extensions"]);
408    }
409
410    #[test]
411    fn test_free_port() {
412        let port = Browser::free_port().unwrap();
413        assert!(port > 1024);
414    }
415}