headless_chrome/browser/
mod.rs

1use std::sync::Arc;
2use std::sync::Mutex;
3use std::sync::mpsc;
4use std::sync::mpsc::{RecvTimeoutError, TryRecvError};
5use std::time::Duration;
6
7use anyhow::{Result, anyhow};
8use log::{debug, error, info, trace};
9
10use process::Process;
11pub use process::{DEFAULT_ARGS, LaunchOptions, LaunchOptionsBuilder};
12pub use tab::Tab;
13pub use transport::ConnectionClosed;
14use transport::Transport;
15use url::Url;
16use which::which;
17
18use crate::protocol::cdp::{
19    self, Browser as B, Target, Target::GetTargets, types::Event, types::Method,
20};
21
22use crate::browser::context::Context;
23use crate::util;
24use B::GetVersion;
25pub use B::GetVersionReturnObject;
26use Target::{CreateTarget, SetDiscoverTargets};
27
28#[cfg(feature = "fetch")]
29pub use fetcher::FetcherOptions;
30
31#[cfg(feature = "fetch")]
32pub use fetcher::Revision;
33
34pub mod context;
35#[cfg(feature = "fetch")]
36mod fetcher;
37mod process;
38pub mod tab;
39pub mod transport;
40
41/// A handle to an instance of Chrome / Chromium, which wraps a WebSocket connection to its debugging port.
42///
43///
44/// Most of your actual "driving" (e.g. clicking, typing, navigating) will be via instances of [Tab](../tab/struct.Tab.html), which are accessible via methods such as `get_tabs`.
45///
46/// A Browser can either manage its own Chrome process or connect to a remote one.
47///
48/// `Browser::default().unwrap()` will return a headless instance of whatever browser can be found using
49/// `default_executable`, which will search on your PATH for relevant binaries or use the path
50/// specified in the `CHROME` env var.
51///
52/// You can use [LaunchOptions](../process/LaunchOptions.struct.html) to automatically
53/// download a revision of Chromium that has a compatible API into your `$XDG_DATA_DIR`. Alternatively,
54/// you can specify your own path to a binary, or make use of the `default_executable` function to use
55///  your already-installed copy of Chrome.
56///
57/// Option 1: Managing a Chrome process
58/// ```rust
59/// # use anyhow::Result;
60/// # fn main() -> Result<()> {
61/// #
62/// use headless_chrome::Browser;
63/// let browser = Browser::default()?;
64/// let first_tab = browser.new_tab()?;
65/// assert_eq!("about:blank", first_tab.get_url());
66/// #
67/// # Ok(())
68/// # }
69/// ```
70///
71/// Option 2: Connecting to a remote Chrome service
72/// - see /examples/print_to_pdf.rs for a working example
73///
74///
75/// While the Chrome DevTools Protocol (CDTP) does define some methods in a
76/// ["Browser" domain](https://chromedevtools.github.io/devtools-protocol/tot/Browser)
77/// (such as for resizing the window in non-headless mode), we currently don't implement those.
78#[derive(Clone)]
79pub struct Browser {
80    inner: Arc<BrowserInner>,
81}
82
83pub struct BrowserInner {
84    process: Option<Process>,
85    transport: Arc<Transport>,
86    tabs: Arc<Mutex<Vec<Arc<Tab>>>>,
87    loop_shutdown_tx: mpsc::SyncSender<()>,
88    close_on_drop: bool,
89}
90
91impl Browser {
92    /// Launch a new Chrome browser.
93    ///
94    /// The browser will have its user data (aka "profile") directory stored in a temporary directory.
95    /// The browser process will be killed when this struct is dropped.
96    pub fn new(launch_options: LaunchOptions) -> Result<Self> {
97        let idle_browser_timeout = launch_options.idle_browser_timeout;
98        let process = Process::new(launch_options)?;
99        let process_id = process.get_id();
100
101        let transport = Arc::new(Transport::new(
102            process.debug_ws_url.clone(),
103            Some(process_id),
104            idle_browser_timeout,
105        )?);
106
107        Self::create_browser(Some(process), transport, idle_browser_timeout, true)
108    }
109
110    /// Calls [`Browser::new`] with options to launch a headless browser using whatever Chrome / Chromium
111    /// binary can be found on the system.
112    pub fn default() -> Result<Self> {
113        let launch_options = LaunchOptions::default_builder()
114            .path(Some(default_executable().map_err(|e| anyhow!(e))?))
115            .build()?;
116        Self::new(launch_options)
117    }
118
119    /// Allows you to drive an externally-launched Chrome process instead of launch one via [`Browser::new`].
120    /// If the browser is idle for 30 seconds, the connection will be dropped.
121    pub fn connect(debug_ws_url: String) -> Result<Self> {
122        Self::connect_with_timeout(debug_ws_url, Duration::from_secs(30))
123    }
124
125    /// Allows you to drive an externally-launched Chrome process instead of launch one via [`Browser::new`].
126    /// If the browser is idle for `idle_browser_timeout`, the connection will be dropped.
127    pub fn connect_with_timeout(
128        debug_ws_url: String,
129        idle_browser_timeout: Duration,
130    ) -> Result<Self> {
131        let url = Url::parse(&debug_ws_url)?;
132
133        let transport = Arc::new(Transport::new(url, None, idle_browser_timeout)?);
134        trace!("created transport");
135
136        Self::create_browser(None, transport, idle_browser_timeout, false)
137    }
138
139    fn create_browser(
140        process: Option<Process>,
141        transport: Arc<Transport>,
142        idle_browser_timeout: Duration,
143        close_on_drop: bool,
144    ) -> Result<Self> {
145        let tabs = Arc::new(Mutex::new(Vec::with_capacity(1)));
146
147        let (shutdown_tx, shutdown_rx) = mpsc::sync_channel(100);
148
149        let browser = Browser {
150            inner: Arc::new(BrowserInner {
151                process,
152                tabs,
153                transport,
154                loop_shutdown_tx: shutdown_tx,
155                close_on_drop,
156            }),
157        };
158
159        let incoming_events_rx = browser.inner.transport.listen_to_browser_events();
160
161        browser.handle_browser_level_events(
162            incoming_events_rx,
163            browser.get_process_id(),
164            shutdown_rx,
165            idle_browser_timeout,
166        );
167        trace!("created browser event listener");
168
169        // so we get events like 'targetCreated' and 'targetDestroyed'
170        trace!("Calling set discover");
171        browser.call_method(SetDiscoverTargets {
172            discover: true,
173            filter: None,
174        })?;
175
176        Ok(browser)
177    }
178
179    pub fn get_process_id(&self) -> Option<u32> {
180        self.inner.process.as_ref().map(process::Process::get_id)
181    }
182
183    pub fn get_ws_url(&self) -> String {
184        match &self.inner.process {
185            None => "browser is not running".to_string(),
186            Some(process) => process.debug_ws_url.clone().to_string(),
187        }
188    }
189
190    /// The tabs are behind an `Arc` and `Mutex` because they're accessible from multiple threads
191    /// (including the one that handles incoming protocol events about new or changed tabs).
192    pub fn get_tabs(&self) -> &Arc<Mutex<Vec<Arc<Tab>>>> {
193        &self.inner.tabs
194    }
195
196    // THIS NO LONGER SEEMS TRUE |
197    //                           v
198    /// Chrome always launches with at least one tab. The reason we have to 'wait' is because information
199    /// about that tab isn't available *immediately* after starting the process. Tabs are behind `Arc`s
200    /// because they each have their own thread which handles events and method responses directed to them.
201    ///
202    /// Wait timeout: 10 secs
203    #[deprecated(since = "1.0.4", note = "Use new_tab() instead.")]
204    pub fn wait_for_initial_tab(&self) -> Result<Arc<Tab>> {
205        match util::Wait::with_timeout(Duration::from_secs(10))
206            .until(|| self.inner.tabs.lock().unwrap().first().cloned())
207        {
208            Ok(tab) => Ok(tab),
209            Err(_) => self.new_tab(),
210        }
211    }
212
213    /// Create a new tab and return a handle to it.
214    ///
215    /// If you want to specify its starting options, see `new_tab_with_options`.
216    ///
217    /// ```rust
218    /// # use anyhow::Result;
219    /// # fn main() -> Result<()> {
220    /// #
221    /// # use headless_chrome::Browser;
222    /// # let browser = Browser::default()?;
223    /// let first_tab = browser.new_tab()?;
224    /// let new_tab = browser.new_tab()?;
225    /// let num_tabs = browser.get_tabs().lock().unwrap().len();
226    /// assert_eq!(2, num_tabs);
227    /// #
228    /// # Ok(())
229    /// # }
230    /// ```
231    pub fn new_tab(&self) -> Result<Arc<Tab>> {
232        let default_blank_tab = CreateTarget {
233            url: "about:blank".to_string(),
234            width: None,
235            height: None,
236            browser_context_id: None,
237            enable_begin_frame_control: None,
238            new_window: None,
239            background: None,
240            for_tab: None,
241        };
242        self.new_tab_with_options(default_blank_tab)
243    }
244
245    /// Create a new tab with a starting url, height / width, context ID and 'frame control'
246    /// ```rust
247    /// # use anyhow::Result;
248    /// # fn main() -> Result<()> {
249    /// #
250    /// # use headless_chrome::{Browser, protocol::target::methods::CreateTarget};
251    /// # let browser = Browser::default()?;
252    ///    let new_tab = browser.new_tab_with_options(CreateTarget {
253    ///    url: "chrome://version",
254    ///    width: Some(1024),
255    ///    height: Some(800),
256    ///    browser_context_id: None,
257    ///    enable_begin_frame_control: None,
258    ///    })?;
259    /// #
260    /// # Ok(())
261    /// # }
262    /// ```
263    pub fn new_tab_with_options(&self, create_target_params: CreateTarget) -> Result<Arc<Tab>> {
264        let target_id = self.call_method(create_target_params)?.target_id;
265
266        util::Wait::with_timeout(Duration::from_secs(20))
267            .until(|| {
268                let tabs = self.inner.tabs.lock().unwrap();
269                tabs.iter().find_map(|tab| {
270                    if *tab.get_target_id() == target_id {
271                        Some(tab.clone())
272                    } else {
273                        None
274                    }
275                })
276            })
277            .map_err(Into::into)
278    }
279
280    /// Creates the equivalent of a new incognito window, AKA a browser context
281    pub fn new_context(&self) -> Result<context::Context<'_>> {
282        debug!("Creating new browser context");
283        let context_id = self
284            .call_method(Target::CreateBrowserContext {
285                dispose_on_detach: None,
286                proxy_server: None,
287                proxy_bypass_list: None,
288                origins_with_universal_network_access: None,
289            })?
290            .browser_context_id;
291        debug!("Created new browser context: {context_id:?}");
292        Ok(Context::new(self, context_id))
293    }
294
295    /// Adds tabs that have not been opened with new_tab to the list of tabs
296    pub fn register_missing_tabs(&self) {
297        let targets = self.call_method(GetTargets { filter: None });
298
299        let mut tabs_lock = self.inner.tabs.lock().unwrap();
300        let mut previous_target_id: String = String::default();
301        for target in targets.unwrap().target_infos {
302            let target_id = target.target_id.clone();
303
304            if tabs_lock
305                .iter()
306                .any(|t| t.get_target_id().clone() == target_id || !target.attached)
307            {
308                previous_target_id = target.target_id;
309                continue;
310            }
311
312            let tab = Tab::new(target, self.inner.transport.clone());
313            if let Ok(tab) = tab {
314                if let Some(index) = tabs_lock
315                    .iter()
316                    .position(|x| x.get_target_id().clone() == previous_target_id)
317                {
318                    tabs_lock.insert(index, Arc::new(tab));
319                } else {
320                    tabs_lock.push(Arc::new(tab));
321                }
322            }
323
324            previous_target_id = target_id;
325        }
326    }
327
328    /// Get version information
329    ///
330    /// ```rust
331    /// # use anyhow::Result;
332    /// # fn main() -> Result<()> {
333    /// #
334    /// # use headless_chrome::Browser;
335    /// # let browser = Browser::default()?;
336    /// let version_info = browser.get_version()?;
337    /// println!("User-Agent is `{}`", version_info.user_agent);
338    /// #
339    /// # Ok(())
340    /// # }
341    /// ```
342    pub fn get_version(&self) -> Result<GetVersionReturnObject> {
343        self.call_method(GetVersion(None))
344    }
345
346    fn handle_browser_level_events(
347        &self,
348        events_rx: mpsc::Receiver<Event>,
349        process_id: Option<u32>,
350        shutdown_rx: mpsc::Receiver<()>,
351        idle_browser_timeout: Duration,
352    ) {
353        let tabs = Arc::clone(&self.inner.tabs);
354        let transport = Arc::clone(&self.inner.transport);
355
356        std::thread::spawn(move || {
357            trace!("Starting browser's event handling loop");
358            loop {
359                match shutdown_rx.try_recv() {
360                    Ok(()) | Err(TryRecvError::Disconnected) => {
361                        info!("Browser event loop received shutdown message");
362                        break;
363                    }
364                    Err(TryRecvError::Empty) => {}
365                }
366
367                match events_rx.recv_timeout(idle_browser_timeout) {
368                    Err(recv_timeout_error) => {
369                        match recv_timeout_error {
370                            RecvTimeoutError::Timeout => {
371                                error!(
372                                    "Got a timeout while listening for browser events (Chrome #{process_id:?})"
373                                );
374                            }
375                            RecvTimeoutError::Disconnected => {
376                                debug!(
377                                    "Browser event sender disconnected while loop was waiting (Chrome #{process_id:?})"
378                                );
379                            }
380                        }
381                        break;
382                    }
383                    Ok(event) => {
384                        match event {
385                            Event::TargetCreated(ev) => {
386                                let target_info = ev.params.target_info;
387                                trace!("Creating target: {target_info:?}");
388                                // when Type == other and url == "" the next trigger would be AttachedToTarget
389                                // meaning the devtools has ben opened automatically..
390                                // for now ignoring devtools tabs to be in tabs..
391                                if target_info.Type == "page" {
392                                    match Tab::new(target_info, Arc::clone(&transport)) {
393                                        Ok(new_tab) => {
394                                            tabs.lock().unwrap().push(Arc::new(new_tab));
395                                        }
396                                        Err(_tab_creation_err) => {
397                                            info!("Failed to create a handle to new tab");
398                                            break;
399                                        }
400                                    }
401                                }
402                            }
403                            Event::TargetInfoChanged(ev) => {
404                                let target_info = &ev.params.target_info;
405                                trace!("Target info changed: {target_info:?}");
406                                if target_info.Type == "page"
407                                    && !target_info.url.starts_with("devtools://")
408                                {
409                                    let locked_tabs = tabs.lock().unwrap();
410                                    if let Some(updated_tab) = locked_tabs
411                                        .iter()
412                                        .find(|tab| *tab.get_target_id() == target_info.target_id)
413                                    {
414                                        updated_tab.update_target_info(target_info.clone());
415                                    } else {
416                                        let raw_event = format!("{ev:?}");
417                                        trace!(
418                                            "Target info changed unhandled event: {}",
419                                            raw_event.chars().take(50).collect::<String>()
420                                        );
421                                    }
422                                }
423                            }
424                            Event::AttachedToTarget(ev) => {
425                                let target_info = ev.params.target_info;
426                                trace!("Attached To Target : {target_info:?}");
427                                // can be useful when knowing if there is a devtools tab open and
428                                // to which tab it is connected (parent)
429                            }
430                            Event::TargetDestroyed(ev) => {
431                                trace!("Target destroyed: {:?}", ev.params.target_id);
432                                let mut locked_tabs = tabs.lock().unwrap();
433                                let pos = locked_tabs
434                                    .iter()
435                                    .position(|tab| *tab.get_target_id() == ev.params.target_id);
436
437                                if let Some(idx) = pos {
438                                    locked_tabs.remove(idx);
439                                }
440                            }
441                            _ => {
442                                let raw_event = format!("{event:?}");
443                                trace!(
444                                    "Unhandled event: {}",
445                                    raw_event.chars().take(50).collect::<String>()
446                                );
447                            }
448                        }
449                    }
450                }
451            }
452            info!("Finished browser's event handling loop");
453        });
454    }
455
456    /// Call a browser method.
457    ///
458    /// See the `cdtp` module documentation for available methods.
459    fn call_method<C>(&self, method: C) -> Result<C::ReturnObject>
460    where
461        C: Method + serde::Serialize,
462    {
463        self.inner.transport.call_method_on_browser(method)
464    }
465
466    #[allow(dead_code)]
467    #[cfg(test)]
468    pub(crate) fn process(&self) -> Option<&Process> {
469        #[allow(clippy::used_underscore_binding)]
470        self.inner.process.as_ref()
471    }
472}
473
474/// [`Browser`] is being dropped!
475/// Dropping the inner browser means that there are no more references in the `Arc` inside [`Browser`].
476impl Drop for BrowserInner {
477    fn drop(&mut self) {
478        info!("Dropping browser");
479        if self.close_on_drop {
480            self.transport
481                .call_method_on_browser(cdp::Browser::Close(None))
482                .ok();
483        }
484        self.loop_shutdown_tx.send(()).ok();
485        self.transport.shutdown();
486    }
487}
488
489/// Returns the path to Chrome's executable.
490///
491/// If the `CHROME` environment variable is set, `default_executable` will
492/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
493/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
494/// searched for in standard places. If that fails,
495/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on Windows)
496/// is consulted. If all of the above fail, an error is returned.
497pub fn default_executable() -> Result<std::path::PathBuf, String> {
498    if let Ok(path) = std::env::var("CHROME") {
499        if std::path::Path::new(&path).exists() {
500            return Ok(path.into());
501        }
502    }
503
504    for app in &[
505        "google-chrome-stable",
506        "google-chrome-beta",
507        "google-chrome-dev",
508        "google-chrome-unstable",
509        "chromium",
510        "chromium-browser",
511        "microsoft-edge-stable",
512        "microsoft-edge-beta",
513        "microsoft-edge-dev",
514        "chrome",
515        "chrome-browser",
516        "msedge",
517        "microsoft-edge",
518    ] {
519        if let Ok(path) = which(app) {
520            return Ok(path);
521        }
522    }
523
524    #[cfg(target_os = "macos")]
525    {
526        for path in &[
527            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
528            "/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
529            "/Applications/Google Chrome Dev.app/Contents/MacOS/Google Chrome Dev",
530            "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
531            "/Applications/Chromium.app/Contents/MacOS/Chromium",
532            "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
533            "/Applications/Microsoft Edge Beta.app/Contents/MacOS/Microsoft Edge Beta",
534            "/Applications/Microsoft Edge Dev.app/Contents/MacOS/Microsoft Edge Dev",
535            "/Applications/Microsoft Edge Canary.app/Contents/MacOS/Microsoft Edge Canary",
536        ][..]
537        {
538            if std::path::Path::new(path).exists() {
539                return Ok(path.into());
540            }
541        }
542    }
543
544    #[cfg(windows)]
545    {
546        use crate::browser::process::get_chrome_path_from_registry;
547
548        if let Some(path) = get_chrome_path_from_registry() {
549            if path.exists() {
550                return Ok(path);
551            }
552        }
553
554        for path in &[r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"][..] {
555            if std::path::Path::new(path).exists() {
556                return Ok(path.into());
557            }
558        }
559    }
560
561    Err("Could not auto detect a chrome executable".to_string())
562}
563
564#[cfg(test)]
565mod test {
566    use super::Browser;
567
568    fn is_sync<T>()
569    where
570        T: Sync,
571    {
572    }
573
574    #[test]
575    fn test_if_browser_is_sync() {
576        is_sync::<Browser>();
577    }
578}