chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use futures::channel::mpsc::{channel, unbounded, Sender};
11use futures::channel::oneshot::channel as oneshot_channel;
12use futures::select;
13use futures::SinkExt;
14
15use crate::async_process::{self, Child, ExitStatus, Stdio};
16use crate::cmd::{to_command_response, CommandMessage};
17use crate::conn::Connection;
18use crate::detection::{self, DetectionOptions};
19use crate::error::{BrowserStderr, CdpError, Result};
20use crate::handler::browser::BrowserContext;
21use crate::handler::viewport::Viewport;
22use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
23use crate::listeners::{EventListenerRequest, EventStream};
24use crate::page::Page;
25use crate::utils;
26use chromiumoxide_cdp::cdp::browser_protocol::browser::{
27    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
28};
29use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
30use chromiumoxide_cdp::cdp::browser_protocol::storage::{
31    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
32};
33use chromiumoxide_cdp::cdp::browser_protocol::target::{
34    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
35    GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
36};
37use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
38use chromiumoxide_types::*;
39use spider_network_blocker::intercept_manager::NetworkInterceptManager;
40
41/// Default `Browser::launch` timeout in MS
42pub const LAUNCH_TIMEOUT: u64 = 20_000;
43
44lazy_static::lazy_static! {
45    /// The request client to get the web socket url.
46    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
47        .timeout(Duration::from_secs(60))
48        .default_headers({
49            let mut m = HeaderMap::new();
50
51            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
52
53            m
54        })
55        .tcp_keepalive(Some(Duration::from_secs(5)))
56        .pool_idle_timeout(Some(Duration::from_secs(60)))
57        .pool_max_idle_per_host(10)
58        .build()
59        .expect("client to build");
60}
61
62/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
63#[derive(Debug)]
64pub struct Browser {
65    /// The `Sender` to send messages to the connection handler that drives the
66    /// websocket
67    pub(crate) sender: Sender<HandlerMessage>,
68    /// How the spawned chromium instance was configured, if any
69    config: Option<BrowserConfig>,
70    /// The spawned chromium instance
71    child: Option<Child>,
72    /// The debug web socket url of the chromium instance
73    debug_ws_url: String,
74    /// The context of the browser
75    pub browser_context: BrowserContext,
76}
77
78/// Browser connection information.
79#[derive(serde::Deserialize, Debug, Default)]
80pub struct BrowserConnection {
81    #[serde(rename = "Browser")]
82    /// The browser name
83    pub browser: String,
84    #[serde(rename = "Protocol-Version")]
85    /// Browser version
86    pub protocol_version: String,
87    #[serde(rename = "User-Agent")]
88    /// User Agent used by default.
89    pub user_agent: String,
90    #[serde(rename = "V8-Version")]
91    /// The v8 engine version
92    pub v8_version: String,
93    #[serde(rename = "WebKit-Version")]
94    /// Webkit version
95    pub webkit_version: String,
96    #[serde(rename = "webSocketDebuggerUrl")]
97    /// Remote debugging address
98    pub web_socket_debugger_url: String,
99}
100
101impl Browser {
102    /// Connect to an already running chromium instance via the given URL.
103    ///
104    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
105    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
106        Self::connect_with_config(url, HandlerConfig::default()).await
107    }
108
109    // Connect to an already running chromium instance with a given `HandlerConfig`.
110    ///
111    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
112    pub async fn connect_with_config(
113        url: impl Into<String>,
114        config: HandlerConfig,
115    ) -> Result<(Self, Handler)> {
116        let mut debug_ws_url = url.into();
117
118        if debug_ws_url.starts_with("http") {
119            match REQUEST_CLIENT
120                .get(
121                    if debug_ws_url.ends_with("/json/version")
122                        || debug_ws_url.ends_with("/json/version/")
123                    {
124                        debug_ws_url.to_owned()
125                    } else {
126                        format!(
127                            "{}{}json/version",
128                            &debug_ws_url,
129                            if debug_ws_url.ends_with('/') { "" } else { "/" }
130                        )
131                    },
132                )
133                .send()
134                .await
135            {
136                Ok(req) => {
137                    let connection: BrowserConnection =
138                        crate::serde_json::from_slice(&req.bytes().await.unwrap_or_default())
139                            .unwrap_or_default();
140                    if !connection.web_socket_debugger_url.is_empty() {
141                        debug_ws_url = connection.web_socket_debugger_url;
142                    }
143                }
144                Err(_) => return Err(CdpError::NoResponse),
145            }
146        }
147
148        let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
149
150        let (tx, rx) = channel(1000);
151
152        let handler_config = BrowserConfig {
153            ignore_https_errors: config.ignore_https_errors,
154            viewport: config.viewport.clone(),
155            request_timeout: config.request_timeout,
156            request_intercept: config.request_intercept,
157            cache_enabled: config.cache_enabled,
158            ignore_visuals: config.ignore_visuals,
159            ignore_stylesheets: config.ignore_stylesheets,
160            ignore_javascript: config.ignore_javascript,
161            ignore_analytics: config.ignore_analytics,
162            ignore_ads: config.ignore_ads,
163            extra_headers: config.extra_headers.clone(),
164            only_html: config.only_html,
165            service_worker_enabled: config.service_worker_enabled,
166            intercept_manager: config.intercept_manager,
167            ..Default::default()
168        };
169
170        let fut = Handler::new(conn, rx, config);
171        let browser_context = fut.default_browser_context().clone();
172
173        let browser = Self {
174            sender: tx,
175            config: Some(handler_config),
176            child: None,
177            debug_ws_url,
178            browser_context,
179        };
180
181        Ok((browser, fut))
182    }
183
184    /// Launches a new instance of `chromium` in the background and attaches to
185    /// its debug web socket.
186    ///
187    /// This fails when no chromium executable could be detected.
188    ///
189    /// This fails if no web socket url could be detected from the child
190    /// processes stderr for more than the configured `launch_timeout`
191    /// (20 seconds by default).
192    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
193        // Canonalize paths to reduce issues with sandboxing
194        config.executable = utils::canonicalize_except_snap(config.executable).await?;
195
196        // Launch a new chromium instance
197        let mut child = config.launch()?;
198
199        /// Faillible initialization to run once the child process is created.
200        ///
201        /// All faillible calls must be executed inside this function. This ensures that all
202        /// errors are caught and that the child process is properly cleaned-up.
203        async fn with_child(
204            config: &BrowserConfig,
205            child: &mut Child,
206        ) -> Result<(String, Connection<CdpEventMessage>)> {
207            let dur = config.launch_timeout;
208            let timeout_fut = Box::pin(tokio::time::sleep(dur));
209
210            // extract the ws:
211            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
212            let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
213            Ok((debug_ws_url, conn))
214        }
215
216        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
217            Ok(conn) => conn,
218            Err(e) => {
219                // An initialization error occurred, clean up the process
220                if let Ok(Some(_)) = child.try_wait() {
221                    // already exited, do nothing, may happen if the browser crashed
222                } else {
223                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
224                    child.kill().await.expect("`Browser::launch` failed but could not clean-up the child process (`kill`)");
225                    child.wait().await.expect("`Browser::launch` failed but could not clean-up the child process (`wait`)");
226                }
227                return Err(e);
228            }
229        };
230
231        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
232        // child process.
233
234        let (tx, rx) = channel(1000);
235
236        let handler_config = HandlerConfig {
237            ignore_https_errors: config.ignore_https_errors,
238            viewport: config.viewport.clone(),
239            context_ids: Vec::new(),
240            request_timeout: config.request_timeout,
241            request_intercept: config.request_intercept,
242            cache_enabled: config.cache_enabled,
243            ignore_visuals: config.ignore_visuals,
244            ignore_stylesheets: config.ignore_stylesheets,
245            ignore_javascript: config.ignore_javascript,
246            ignore_analytics: config.ignore_analytics,
247            ignore_ads: config.ignore_ads,
248            extra_headers: config.extra_headers.clone(),
249            only_html: config.only_html,
250            service_worker_enabled: config.service_worker_enabled,
251            created_first_target: false,
252            intercept_manager: config.intercept_manager,
253        };
254
255        let fut = Handler::new(conn, rx, handler_config);
256        let browser_context = fut.default_browser_context().clone();
257
258        let browser = Self {
259            sender: tx,
260            config: Some(config),
261            child: Some(child),
262            debug_ws_url,
263            browser_context,
264        };
265
266        Ok((browser, fut))
267    }
268
269    /// Request to fetch all existing browser targets.
270    ///
271    /// By default, only targets launched after the browser connection are tracked
272    /// when connecting to a existing browser instance with the devtools websocket url
273    /// This function fetches existing targets on the browser and adds them as pages internally
274    ///
275    /// The pages are not guaranteed to be ready as soon as the function returns
276    /// You should wait a few millis if you need to use a page
277    /// Returns [TargetInfo]
278    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
279        let (tx, rx) = oneshot_channel();
280
281        self.sender
282            .clone()
283            .send(HandlerMessage::FetchTargets(tx))
284            .await?;
285
286        rx.await?
287    }
288
289    /// Request for the browser to close completely.
290    ///
291    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
292    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
293    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
294    /// [`Browser::drop`] waits automatically if needed.
295    pub async fn close(&self) -> Result<CloseReturns> {
296        let (tx, rx) = oneshot_channel();
297
298        self.sender
299            .clone()
300            .send(HandlerMessage::CloseBrowser(tx))
301            .await?;
302
303        rx.await?
304    }
305
306    /// Asynchronously wait for the spawned chromium instance to exit completely.
307    ///
308    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
309    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
310    /// "zombie" processes.
311    ///
312    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
313    /// connected to an existing browser through [`Browser::connect`])
314    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
315        if let Some(child) = self.child.as_mut() {
316            Ok(Some(child.wait().await?))
317        } else {
318            Ok(None)
319        }
320    }
321
322    /// If the spawned chromium instance has completely exited, wait for it.
323    ///
324    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
325    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
326    /// "zombie" processes.
327    ///
328    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
329    /// connected to an existing browser through [`Browser::connect`])
330    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
331        if let Some(child) = self.child.as_mut() {
332            child.try_wait()
333        } else {
334            Ok(None)
335        }
336    }
337
338    /// Get the spawned chromium instance
339    ///
340    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
341    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
342    ///
343    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
344    /// for the selected runtime.
345    ///
346    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
347    /// connected to an existing browser through [`Browser::connect`])
348    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
349        self.child.as_mut()
350    }
351
352    /// Has a browser instance launched on system.
353    pub fn has_child(&self) -> bool {
354        self.child.is_some()
355    }
356
357    /// Forcibly kill the spawned chromium instance
358    ///
359    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
360    /// process to exit to avoid "zombie" processes.
361    ///
362    /// This method is provided to help if the browser does not close by itself. You should prefer
363    /// to use [`Browser::close`].
364    ///
365    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
366    /// connected to an existing browser through [`Browser::connect`])
367    pub async fn kill(&mut self) -> Option<io::Result<()>> {
368        match self.child.as_mut() {
369            Some(child) => Some(child.kill().await),
370            None => None,
371        }
372    }
373
374    /// If not launched as incognito this creates a new incognito browser
375    /// context. After that this browser exists within the incognito session.
376    /// New pages created while being in incognito mode will also run in the
377    /// incognito context. Incognito contexts won't share cookies/cache with
378    /// other browser contexts.
379    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
380        if !self.is_incognito_configured() {
381            let browser_context_id = self
382                .create_browser_context(CreateBrowserContextParams::default())
383                .await?;
384            self.browser_context = BrowserContext::from(browser_context_id);
385            self.sender
386                .clone()
387                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
388                .await?;
389        }
390
391        Ok(self)
392    }
393
394    /// If a incognito session was created with
395    /// `Browser::start_incognito_context` this disposes this context.
396    ///
397    /// # Note This will also dispose all pages that were running within the
398    /// incognito context.
399    pub async fn quit_incognito_context_base(
400        &self,
401        browser_context_id: BrowserContextId,
402    ) -> Result<&Self> {
403        self.dispose_browser_context(browser_context_id.clone())
404            .await?;
405        self.sender
406            .clone()
407            .send(HandlerMessage::DisposeContext(BrowserContext::from(
408                browser_context_id,
409            )))
410            .await?;
411        Ok(self)
412    }
413
414    /// If a incognito session was created with
415    /// `Browser::start_incognito_context` this disposes this context.
416    ///
417    /// # Note This will also dispose all pages that were running within the
418    /// incognito context.
419    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
420        if let Some(id) = self.browser_context.take() {
421            let _ = self.quit_incognito_context_base(id).await;
422        }
423        Ok(self)
424    }
425
426    /// Whether incognito mode was configured from the start
427    fn is_incognito_configured(&self) -> bool {
428        self.config
429            .as_ref()
430            .map(|c| c.incognito)
431            .unwrap_or_default()
432    }
433
434    /// Returns the address of the websocket this browser is attached to
435    pub fn websocket_address(&self) -> &String {
436        &self.debug_ws_url
437    }
438
439    /// Whether the BrowserContext is incognito.
440    pub fn is_incognito(&self) -> bool {
441        self.is_incognito_configured() || self.browser_context.is_incognito()
442    }
443
444    /// The config of the spawned chromium instance if any.
445    pub fn config(&self) -> Option<&BrowserConfig> {
446        self.config.as_ref()
447    }
448
449    /// Create a new browser page
450    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
451        let (tx, rx) = oneshot_channel();
452        let mut params = params.into();
453
454        if let Some(id) = self.browser_context.id() {
455            if params.browser_context_id.is_none() {
456                params.browser_context_id = Some(id.clone());
457            }
458        }
459
460        let _ = self
461            .sender
462            .clone()
463            .send(HandlerMessage::CreatePage(params, tx))
464            .await;
465
466        rx.await?
467    }
468
469    /// Version information about the browser
470    pub async fn version(&self) -> Result<GetVersionReturns> {
471        Ok(self.execute(GetVersionParams::default()).await?.result)
472    }
473
474    /// Returns the user agent of the browser
475    pub async fn user_agent(&self) -> Result<String> {
476        Ok(self.version().await?.user_agent)
477    }
478
479    /// Call a browser method.
480    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
481        let (tx, rx) = oneshot_channel();
482        let method = cmd.identifier();
483        let msg = CommandMessage::new(cmd, tx)?;
484
485        self.sender
486            .clone()
487            .send(HandlerMessage::Command(msg))
488            .await?;
489        let resp = rx.await??;
490        to_command_response::<T>(resp, method)
491    }
492
493    /// Return all of the pages of the browser
494    pub async fn pages(&self) -> Result<Vec<Page>> {
495        let (tx, rx) = oneshot_channel();
496        self.sender
497            .clone()
498            .send(HandlerMessage::GetPages(tx))
499            .await?;
500        Ok(rx.await?)
501    }
502
503    /// Return page of given target_id
504    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
505        let (tx, rx) = oneshot_channel();
506        self.sender
507            .clone()
508            .send(HandlerMessage::GetPage(target_id, tx))
509            .await?;
510        rx.await?.ok_or(CdpError::NotFound)
511    }
512
513    /// Set listener for browser event
514    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
515        let (tx, rx) = unbounded();
516        self.sender
517            .clone()
518            .send(HandlerMessage::AddEventListener(
519                EventListenerRequest::new::<T>(tx),
520            ))
521            .await?;
522
523        Ok(EventStream::new(rx))
524    }
525
526    /// Creates a new empty browser context.
527    pub async fn create_browser_context(
528        &mut self,
529        params: CreateBrowserContextParams,
530    ) -> Result<BrowserContextId> {
531        let response = self.execute(params).await?;
532        Ok(response.result.browser_context_id)
533    }
534
535    /// Returns all browser contexts created with Target.createBrowserContext method.
536    pub async fn get_browser_contexts(
537        &mut self,
538        params: GetBrowserContextsParams,
539    ) -> Result<GetBrowserContextsReturns> {
540        let response = self.execute(params).await?;
541        Ok(response.result)
542    }
543
544    /// Send a new empty browser context.
545    pub async fn send_new_context(&mut self, browser_context_id: BrowserContextId) -> Result<()> {
546        self.browser_context = BrowserContext::from(browser_context_id);
547        self.sender
548            .clone()
549            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
550            .await?;
551        Ok(())
552    }
553
554    /// Deletes a browser context.
555    pub async fn dispose_browser_context(
556        &self,
557        browser_context_id: impl Into<BrowserContextId>,
558    ) -> Result<()> {
559        self.execute(DisposeBrowserContextParams::new(browser_context_id))
560            .await?;
561
562        Ok(())
563    }
564
565    /// Clears cookies.
566    pub async fn clear_cookies(&self) -> Result<()> {
567        self.execute(ClearCookiesParams::default()).await?;
568        Ok(())
569    }
570
571    /// Returns all browser cookies.
572    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
573        let mut cmd = GetCookiesParams::default();
574
575        cmd.browser_context_id = self.browser_context.id.clone();
576
577        Ok(self.execute(cmd).await?.result.cookies)
578    }
579
580    /// Sets given cookies.
581    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
582        for cookie in &mut cookies {
583            if let Some(url) = cookie.url.as_ref() {
584                crate::page::validate_cookie_url(url)?;
585            }
586        }
587
588        let mut cookies_param = SetCookiesParams::new(cookies);
589
590        cookies_param.browser_context_id = self.browser_context.id.clone();
591
592        self.execute(cookies_param).await?;
593        Ok(self)
594    }
595}
596
597impl Drop for Browser {
598    fn drop(&mut self) {
599        if let Some(child) = self.child.as_mut() {
600            if let Ok(Some(_)) = child.try_wait() {
601                // Already exited, do nothing. Usually occurs after using the method close or kill.
602            } else {
603                // We set the `kill_on_drop` property for the child process, so no need to explicitely
604                // kill it here. It can't really be done anyway since the method is async.
605                //
606                // On Unix, the process will be reaped in the background by the runtime automatically
607                // so it won't leave any resources locked. It is, however, a better practice for the user to
608                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
609                // warn him here.
610                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
611            }
612        }
613    }
614}
615
616/// Resolve devtools WebSocket URL from the provided browser process
617///
618/// If an error occurs, it returns the browser's stderr output.
619///
620/// The URL resolution fails if:
621/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
622/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
623/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
624///   the browser's stderr: end of stream, invalid UTF-8, other
625async fn ws_url_from_output(
626    child_process: &mut Child,
627    timeout_fut: impl Future<Output = ()> + Unpin,
628) -> Result<String> {
629    use futures::{AsyncBufReadExt, FutureExt};
630    let mut timeout_fut = timeout_fut.fuse();
631    let stderr = child_process.stderr.take().expect("no stderror");
632    let mut stderr_bytes = Vec::<u8>::new();
633    let mut exit_status_fut = Box::pin(child_process.wait()).fuse();
634    let mut buf = futures::io::BufReader::new(stderr);
635    loop {
636        select! {
637            _ = timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
638            exit_status = exit_status_fut => {
639                return Err(match exit_status {
640                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
641                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
642                })
643            },
644            read_res = buf.read_until(b'\n', &mut stderr_bytes).fuse() => {
645                match read_res {
646                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
647                    Ok(byte_count) => {
648                        if byte_count == 0 {
649                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
650                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
651                        }
652                        let start_offset = stderr_bytes.len() - byte_count;
653                        let new_bytes = &stderr_bytes[start_offset..];
654                        match std::str::from_utf8(new_bytes) {
655                            Err(_) => {
656                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
657                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
658                            }
659                            Ok(line) => {
660                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
661                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
662                                        return Ok(ws.trim().to_string());
663                                    }
664                                }
665                            }
666                        }
667                    }
668                }
669            }
670        }
671    }
672}
673
674#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
675pub enum HeadlessMode {
676    /// The "headful" mode.
677    False,
678    /// The old headless mode.
679    #[default]
680    True,
681    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
682    New,
683}
684
685#[derive(Debug, Clone, Default)]
686pub struct BrowserConfig {
687    /// Determines whether to run headless version of the browser. Defaults to
688    /// true.
689    headless: HeadlessMode,
690    /// Determines whether to run the browser with a sandbox.
691    sandbox: bool,
692    /// Launch the browser with a specific window width and height.
693    window_size: Option<(u32, u32)>,
694    /// Launch the browser with a specific debugging port.
695    port: u16,
696    /// Path for Chrome or Chromium.
697    ///
698    /// If unspecified, the create will try to automatically detect a suitable
699    /// binary.
700    executable: std::path::PathBuf,
701
702    /// A list of Chrome extensions to load.
703    ///
704    /// An extension should be a path to a folder containing the extension code.
705    /// CRX files cannot be used directly and must be first extracted.
706    ///
707    /// Note that Chrome does not support loading extensions in headless-mode.
708    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
709    extensions: Vec<String>,
710
711    /// Environment variables to set for the Chromium process.
712    /// Passes value through to std::process::Command::envs.
713    pub process_envs: Option<HashMap<String, String>>,
714
715    /// Data dir for user data
716    pub user_data_dir: Option<PathBuf>,
717
718    /// Whether to launch the `Browser` in incognito mode
719    incognito: bool,
720
721    /// Timeout duration for `Browser::launch`.
722    launch_timeout: Duration,
723
724    /// Ignore https errors, default is true
725    ignore_https_errors: bool,
726    pub viewport: Option<Viewport>,
727    /// The duration after a request with no response should time out
728    request_timeout: Duration,
729
730    /// Additional command line arguments to pass to the browser instance.
731    args: Vec<String>,
732
733    /// Whether to disable DEFAULT_ARGS or not, default is false
734    disable_default_args: bool,
735
736    /// Whether to enable request interception
737    pub request_intercept: bool,
738
739    /// Whether to enable cache.
740    pub cache_enabled: bool,
741    /// Whether to enable/disable service workers.
742    pub service_worker_enabled: bool,
743
744    /// Whether to ignore visuals when request interception is enabled.
745    pub ignore_visuals: bool,
746    /// Whether to ignore stylesheets when request interception is enabled.
747    pub ignore_stylesheets: bool,
748    /// Whether to ignore javascript when request interception is enabled. This will allow framework JS like react to go through.
749    pub ignore_javascript: bool,
750    /// Whether to ignore analytics when request interception is enabled.
751    pub ignore_analytics: bool,
752    /// Whether to ignore ads when request interception is enabled.
753    pub ignore_ads: bool,
754    /// Extra headers.
755    pub extra_headers: Option<std::collections::HashMap<String, String>>,
756    /// Only html
757    pub only_html: bool,
758    /// The interception intercept manager.
759    pub intercept_manager: NetworkInterceptManager,
760}
761
762#[derive(Debug, Clone)]
763pub struct BrowserConfigBuilder {
764    headless: HeadlessMode,
765    sandbox: bool,
766    window_size: Option<(u32, u32)>,
767    port: u16,
768    executable: Option<PathBuf>,
769    executation_detection: DetectionOptions,
770    extensions: Vec<String>,
771    process_envs: Option<HashMap<String, String>>,
772    user_data_dir: Option<PathBuf>,
773    incognito: bool,
774    launch_timeout: Duration,
775    ignore_https_errors: bool,
776    viewport: Option<Viewport>,
777    request_timeout: Duration,
778    args: Vec<String>,
779    disable_default_args: bool,
780    request_intercept: bool,
781    cache_enabled: bool,
782    service_worker_enabled: bool,
783    ignore_visuals: bool,
784    ignore_ads: bool,
785    ignore_javascript: bool,
786    ignore_stylesheets: bool,
787    ignore_analytics: bool,
788    only_html: bool,
789    extra_headers: Option<std::collections::HashMap<String, String>>,
790    intercept_manager: NetworkInterceptManager,
791}
792
793impl BrowserConfig {
794    pub fn builder() -> BrowserConfigBuilder {
795        BrowserConfigBuilder::default()
796    }
797
798    pub fn with_executable(path: impl AsRef<Path>) -> Self {
799        Self::builder()
800            .chrome_executable(path)
801            .build()
802            .expect("path to executable exist")
803    }
804}
805
806impl Default for BrowserConfigBuilder {
807    fn default() -> Self {
808        Self {
809            headless: HeadlessMode::True,
810            sandbox: true,
811            window_size: None,
812            port: 0,
813            executable: None,
814            executation_detection: DetectionOptions::default(),
815            extensions: Vec::new(),
816            process_envs: None,
817            user_data_dir: None,
818            incognito: false,
819            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
820            ignore_https_errors: true,
821            viewport: Some(Default::default()),
822            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
823            args: Vec::new(),
824            disable_default_args: false,
825            request_intercept: false,
826            cache_enabled: true,
827            ignore_visuals: false,
828            ignore_ads: false,
829            ignore_javascript: false,
830            ignore_analytics: false,
831            ignore_stylesheets: false,
832            only_html: false,
833            extra_headers: Default::default(),
834            service_worker_enabled: true,
835            intercept_manager: NetworkInterceptManager::Unknown,
836        }
837    }
838}
839
840impl BrowserConfigBuilder {
841    pub fn window_size(mut self, width: u32, height: u32) -> Self {
842        self.window_size = Some((width, height));
843        self
844    }
845
846    pub fn no_sandbox(mut self) -> Self {
847        self.sandbox = false;
848        self
849    }
850
851    pub fn with_head(mut self) -> Self {
852        self.headless = HeadlessMode::False;
853        self
854    }
855
856    pub fn new_headless_mode(mut self) -> Self {
857        self.headless = HeadlessMode::New;
858        self
859    }
860
861    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
862        self.headless = mode;
863        self
864    }
865
866    pub fn incognito(mut self) -> Self {
867        self.incognito = true;
868        self
869    }
870
871    pub fn respect_https_errors(mut self) -> Self {
872        self.ignore_https_errors = false;
873        self
874    }
875
876    pub fn port(mut self, port: u16) -> Self {
877        self.port = port;
878        self
879    }
880
881    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
882        self.launch_timeout = timeout;
883        self
884    }
885
886    pub fn request_timeout(mut self, timeout: Duration) -> Self {
887        self.request_timeout = timeout;
888        self
889    }
890
891    /// Configures the viewport of the browser, which defaults to `800x600`.
892    /// `None` disables viewport emulation (i.e., it uses the browsers default
893    /// configuration, which fills the available space. This is similar to what
894    /// Playwright does when you provide `null` as the value of its `viewport`
895    /// option).
896    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
897        self.viewport = viewport.into();
898        self
899    }
900
901    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
902        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
903        self
904    }
905
906    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
907        self.executable = Some(path.as_ref().to_path_buf());
908        self
909    }
910
911    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
912        self.executation_detection = options;
913        self
914    }
915
916    pub fn extension(mut self, extension: impl Into<String>) -> Self {
917        self.extensions.push(extension.into());
918        self
919    }
920
921    pub fn extensions<I, S>(mut self, extensions: I) -> Self
922    where
923        I: IntoIterator<Item = S>,
924        S: Into<String>,
925    {
926        for ext in extensions {
927            self.extensions.push(ext.into());
928        }
929        self
930    }
931
932    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
933        self.process_envs
934            .get_or_insert(HashMap::new())
935            .insert(key.into(), val.into());
936        self
937    }
938
939    pub fn envs<I, K, V>(mut self, envs: I) -> Self
940    where
941        I: IntoIterator<Item = (K, V)>,
942        K: Into<String>,
943        V: Into<String>,
944    {
945        self.process_envs
946            .get_or_insert(HashMap::new())
947            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
948        self
949    }
950
951    pub fn arg(mut self, arg: impl Into<String>) -> Self {
952        self.args.push(arg.into());
953        self
954    }
955
956    pub fn args<I, S>(mut self, args: I) -> Self
957    where
958        I: IntoIterator<Item = S>,
959        S: Into<String>,
960    {
961        for arg in args {
962            self.args.push(arg.into());
963        }
964        self
965    }
966
967    pub fn disable_default_args(mut self) -> Self {
968        self.disable_default_args = true;
969        self
970    }
971
972    pub fn enable_request_intercept(mut self) -> Self {
973        self.request_intercept = true;
974        self
975    }
976
977    pub fn disable_request_intercept(mut self) -> Self {
978        self.request_intercept = false;
979        self
980    }
981
982    pub fn enable_cache(mut self) -> Self {
983        self.cache_enabled = true;
984        self
985    }
986
987    pub fn disable_cache(mut self) -> Self {
988        self.cache_enabled = false;
989        self
990    }
991
992    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
993        self.service_worker_enabled = bypass;
994        self
995    }
996
997    pub fn set_extra_headers(
998        mut self,
999        headers: Option<std::collections::HashMap<String, String>>,
1000    ) -> Self {
1001        self.extra_headers = headers;
1002        self
1003    }
1004
1005    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1006        let executable = if let Some(e) = self.executable {
1007            e
1008        } else {
1009            detection::default_executable(self.executation_detection)?
1010        };
1011
1012        Ok(BrowserConfig {
1013            headless: self.headless,
1014            sandbox: self.sandbox,
1015            window_size: self.window_size,
1016            port: self.port,
1017            executable,
1018            extensions: self.extensions,
1019            process_envs: self.process_envs,
1020            user_data_dir: self.user_data_dir,
1021            incognito: self.incognito,
1022            launch_timeout: self.launch_timeout,
1023            ignore_https_errors: self.ignore_https_errors,
1024            viewport: self.viewport,
1025            request_timeout: self.request_timeout,
1026            args: self.args,
1027            disable_default_args: self.disable_default_args,
1028            request_intercept: self.request_intercept,
1029            cache_enabled: self.cache_enabled,
1030            ignore_visuals: self.ignore_visuals,
1031            ignore_ads: self.ignore_ads,
1032            ignore_javascript: self.ignore_javascript,
1033            ignore_analytics: self.ignore_analytics,
1034            ignore_stylesheets: self.ignore_stylesheets,
1035            extra_headers: self.extra_headers,
1036            only_html: self.only_html,
1037            intercept_manager: self.intercept_manager,
1038            service_worker_enabled: self.service_worker_enabled,
1039        })
1040    }
1041}
1042
1043impl BrowserConfig {
1044    pub fn launch(&self) -> io::Result<Child> {
1045        let mut cmd = async_process::Command::new(&self.executable);
1046
1047        if self.disable_default_args {
1048            cmd.args(&self.args);
1049        } else {
1050            cmd.args(DEFAULT_ARGS).args(&self.args);
1051        }
1052
1053        if !self
1054            .args
1055            .iter()
1056            .any(|arg| arg.contains("--remote-debugging-port="))
1057        {
1058            cmd.arg(format!("--remote-debugging-port={}", self.port));
1059        }
1060
1061        cmd.args(
1062            self.extensions
1063                .iter()
1064                .map(|e| format!("--load-extension={e}")),
1065        );
1066
1067        if let Some(ref user_data) = self.user_data_dir {
1068            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1069        } else {
1070            // If the user did not specify a data directory, this would default to the systems default
1071            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1072            // a temp dir just for chromiumoxide instead.
1073            cmd.arg(format!(
1074                "--user-data-dir={}",
1075                std::env::temp_dir().join("chromiumoxide-runner").display()
1076            ));
1077        }
1078
1079        if let Some((width, height)) = self.window_size {
1080            cmd.arg(format!("--window-size={width},{height}"));
1081        }
1082
1083        if !self.sandbox {
1084            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1085        }
1086
1087        match self.headless {
1088            HeadlessMode::False => (),
1089            HeadlessMode::True => {
1090                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1091            }
1092            HeadlessMode::New => {
1093                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1094            }
1095        }
1096
1097        if self.incognito {
1098            cmd.arg("--incognito");
1099        }
1100
1101        if let Some(ref envs) = self.process_envs {
1102            cmd.envs(envs);
1103        }
1104        cmd.stderr(Stdio::piped()).spawn()
1105    }
1106}
1107
1108/// Returns the path to Chrome's executable.
1109///
1110/// If the `CHROME` environment variable is set, `default_executable` will
1111/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1112/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1113/// searched for in standard places. If that fails,
1114/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1115/// Windows) is consulted. If all of the above fail, an error is returned.
1116#[deprecated(note = "Use detection::default_executable instead")]
1117pub fn default_executable() -> Result<std::path::PathBuf, String> {
1118    let options = DetectionOptions {
1119        msedge: false,
1120        unstable: false,
1121    };
1122    detection::default_executable(options)
1123}
1124
1125/// These are passed to the Chrome binary by default.
1126/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1127static DEFAULT_ARGS: [&str; 26] = [
1128    "--disable-background-networking",
1129    "--enable-features=NetworkService,NetworkServiceInProcess",
1130    "--disable-background-timer-throttling",
1131    "--disable-backgrounding-occluded-windows",
1132    "--disable-breakpad",
1133    "--disable-client-side-phishing-detection",
1134    "--disable-component-extensions-with-background-pages",
1135    "--disable-default-apps",
1136    "--disable-dev-shm-usage",
1137    "--disable-extensions",
1138    "--disable-features=TranslateUI",
1139    "--disable-hang-monitor",
1140    "--disable-ipc-flooding-protection",
1141    "--disable-popup-blocking",
1142    "--disable-prompt-on-repost",
1143    "--disable-renderer-backgrounding",
1144    "--disable-sync",
1145    "--force-color-profile=srgb",
1146    "--metrics-recording-only",
1147    "--no-first-run",
1148    "--enable-automation",
1149    "--password-store=basic",
1150    "--use-mock-keychain",
1151    "--enable-blink-features=IdleDetection",
1152    "--lang=en_US",
1153    "--disable-blink-features=AutomationControlled",
1154];