Skip to main content

chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use tokio::sync::mpsc::{channel, unbounded_channel, Sender};
11use tokio::sync::oneshot::channel as oneshot_channel;
12
13use crate::async_process::{self, Child, ExitStatus, Stdio};
14use crate::cmd::{to_command_response, CommandMessage};
15use crate::conn::Connection;
16use crate::detection::{self, DetectionOptions};
17use crate::error::{BrowserStderr, CdpError, Result};
18use crate::handler::browser::BrowserContext;
19use crate::handler::viewport::Viewport;
20use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
21use crate::listeners::{EventListenerRequest, EventStream};
22use crate::page::Page;
23use crate::utils;
24use chromiumoxide_cdp::cdp::browser_protocol::browser::{
25    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
26};
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    PermissionDescriptor, PermissionSetting, SetPermissionParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
36    GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
37};
38
39use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
40use chromiumoxide_types::*;
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42
43/// Default `Browser::launch` timeout in MS
44pub const LAUNCH_TIMEOUT: u64 = 20_000;
45
46lazy_static::lazy_static! {
47    /// The request client to get the web socket url.
48    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
49        .timeout(Duration::from_secs(60))
50        .default_headers({
51            let mut m = HeaderMap::new();
52
53            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55            m
56        })
57        .tcp_keepalive(Some(Duration::from_secs(5)))
58        .pool_idle_timeout(Some(Duration::from_secs(60)))
59        .pool_max_idle_per_host(10)
60        .build()
61        .expect("client to build");
62}
63
64/// Returns chromey's global `reqwest::Client` for reuse by other subsystems
65/// (e.g. remote cache uploads via `spider_remote_cache`).
66pub fn request_client() -> &'static reqwest::Client {
67    &REQUEST_CLIENT
68}
69
70/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
71#[derive(Debug)]
72pub struct Browser {
73    /// The `Sender` to send messages to the connection handler that drives the
74    /// websocket
75    pub(crate) sender: Sender<HandlerMessage>,
76    /// How the spawned chromium instance was configured, if any
77    config: Option<BrowserConfig>,
78    /// The spawned chromium instance
79    child: Option<Child>,
80    /// The debug web socket url of the chromium instance
81    debug_ws_url: String,
82    /// The context of the browser
83    pub browser_context: BrowserContext,
84}
85
86/// Browser connection information.
87#[derive(serde::Deserialize, Debug, Default)]
88pub struct BrowserConnection {
89    #[serde(rename = "Browser")]
90    /// The browser name
91    pub browser: String,
92    #[serde(rename = "Protocol-Version")]
93    /// Browser version
94    pub protocol_version: String,
95    #[serde(rename = "User-Agent")]
96    /// User Agent used by default.
97    pub user_agent: String,
98    #[serde(rename = "V8-Version")]
99    /// The v8 engine version
100    pub v8_version: String,
101    #[serde(rename = "WebKit-Version")]
102    /// Webkit version
103    pub webkit_version: String,
104    #[serde(rename = "webSocketDebuggerUrl")]
105    /// Remote debugging address
106    pub web_socket_debugger_url: String,
107}
108
109impl Browser {
110    /// Connect to an already running chromium instance via the given URL.
111    ///
112    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
113    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
114        Self::connect_with_config(url, HandlerConfig::default()).await
115    }
116
117    // Connect to an already running chromium instance with a given `HandlerConfig`.
118    ///
119    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
120    pub async fn connect_with_config(
121        url: impl Into<String>,
122        config: HandlerConfig,
123    ) -> Result<(Self, Handler)> {
124        let mut debug_ws_url = url.into();
125        let retries = config.connection_retries;
126
127        if debug_ws_url.starts_with("http") {
128            let version_url = if debug_ws_url.ends_with("/json/version")
129                || debug_ws_url.ends_with("/json/version/")
130            {
131                debug_ws_url.to_owned()
132            } else {
133                format!(
134                    "{}{}json/version",
135                    &debug_ws_url,
136                    if debug_ws_url.ends_with('/') { "" } else { "/" }
137                )
138            };
139
140            let mut discovered = false;
141
142            for attempt in 0..=retries {
143                let retry = || async {
144                    if attempt < retries {
145                        // Cap at conn.rs MAX_BACKOFF_MS so a large
146                        // `connection_retries` can't synthesise a multi-day
147                        // sleep — `50 * 3^attempt` blows past u64 around
148                        // attempt=40 and overshoots a sane bound far earlier.
149                        let backoff_ms = 50u64
150                            .saturating_mul(3u64.saturating_pow(attempt))
151                            .min(crate::conn::MAX_BACKOFF_MS);
152                        tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
153                    }
154                };
155
156                match REQUEST_CLIENT.get(&version_url).send().await {
157                    Ok(req) => match req.bytes().await {
158                        Ok(b) => {
159                            match crate::serde_json::from_slice::<Box<BrowserConnection>>(&b) {
160                                Ok(connection)
161                                    if !connection.web_socket_debugger_url.is_empty() =>
162                                {
163                                    debug_ws_url = connection.web_socket_debugger_url;
164                                    discovered = true;
165                                    break;
166                                }
167                                _ => {
168                                    // JSON parse failed or webSocketDebuggerUrl was empty — retry
169                                    retry().await;
170                                }
171                            }
172                        }
173                        Err(_) => {
174                            retry().await;
175                        }
176                    },
177                    Err(_) => {
178                        retry().await;
179                    }
180                }
181            }
182
183            if !discovered {
184                return Err(CdpError::NoResponse);
185            }
186        }
187
188        let conn =
189            Connection::<CdpEventMessage>::connect_with_retries(&debug_ws_url, retries).await?;
190
191        let (tx, rx) = channel(config.channel_capacity);
192
193        let handler_config = BrowserConfig {
194            ignore_https_errors: config.ignore_https_errors,
195            viewport: config.viewport.clone(),
196            request_timeout: config.request_timeout,
197            request_intercept: config.request_intercept,
198            cache_enabled: config.cache_enabled,
199            ignore_visuals: config.ignore_visuals,
200            ignore_stylesheets: config.ignore_stylesheets,
201            ignore_javascript: config.ignore_javascript,
202            ignore_analytics: config.ignore_analytics,
203            ignore_prefetch: config.ignore_prefetch,
204            ignore_ads: config.ignore_ads,
205            extra_headers: config.extra_headers.clone(),
206            only_html: config.only_html,
207            service_worker_enabled: config.service_worker_enabled,
208            intercept_manager: config.intercept_manager,
209            max_bytes_allowed: config.max_bytes_allowed,
210            max_redirects: config.max_redirects,
211            max_main_frame_navigations: config.max_main_frame_navigations,
212            whitelist_patterns: config.whitelist_patterns.clone(),
213            blacklist_patterns: config.blacklist_patterns.clone(),
214            ..Default::default()
215        };
216
217        let fut = Handler::new(conn, rx, config);
218        let browser_context = fut.default_browser_context().clone();
219
220        let browser = Self {
221            sender: tx,
222            config: Some(handler_config),
223            child: None,
224            debug_ws_url,
225            browser_context,
226        };
227
228        Ok((browser, fut))
229    }
230
231    /// Launches a new instance of `chromium` in the background and attaches to
232    /// its debug web socket.
233    ///
234    /// This fails when no chromium executable could be detected.
235    ///
236    /// This fails if no web socket url could be detected from the child
237    /// processes stderr for more than the configured `launch_timeout`
238    /// (20 seconds by default).
239    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
240        // Eagerly initialize the background cleanup worker in this
241        // runtime so that later `Drop` calls on CDP streams / temp
242        // files (from `bg_cleanup::submit`) land on a live receiver.
243        // This is a single atomic load after the first call — safe
244        // and cheap to invoke on every `launch`.
245        crate::bg_cleanup::init_worker();
246
247        // Canonalize paths to reduce issues with sandboxing
248        config.executable = utils::canonicalize_except_snap(config.executable).await?;
249
250        // Launch a new chromium instance
251        let mut child = config.launch()?;
252
253        /// Faillible initialization to run once the child process is created.
254        ///
255        /// All faillible calls must be executed inside this function. This ensures that all
256        /// errors are caught and that the child process is properly cleaned-up.
257        async fn with_child(
258            config: &BrowserConfig,
259            child: &mut Child,
260        ) -> Result<(String, Connection<CdpEventMessage>)> {
261            let dur = config.launch_timeout;
262            let timeout_fut = Box::pin(tokio::time::sleep(dur));
263
264            // extract the ws:
265            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
266            let conn = Connection::<CdpEventMessage>::connect_with_retries(
267                &debug_ws_url,
268                config.connection_retries,
269            )
270            .await?;
271            Ok((debug_ws_url, conn))
272        }
273
274        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
275            Ok(conn) => conn,
276            Err(e) => {
277                // An initialization error occurred, clean up the process
278                if let Ok(Some(_)) = child.try_wait() {
279                    // already exited, do nothing, may happen if the browser crashed
280                } else {
281                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
282                    let _ = child.kill().await;
283                    let _ = child.wait().await;
284                }
285                return Err(e);
286            }
287        };
288
289        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
290        // child process.
291
292        let (tx, rx) = channel(config.channel_capacity);
293
294        let handler_config = HandlerConfig {
295            ignore_https_errors: config.ignore_https_errors,
296            viewport: config.viewport.clone(),
297            context_ids: Vec::new(),
298            request_timeout: config.request_timeout,
299            request_intercept: config.request_intercept,
300            cache_enabled: config.cache_enabled,
301            ignore_visuals: config.ignore_visuals,
302            ignore_stylesheets: config.ignore_stylesheets,
303            ignore_javascript: config.ignore_javascript,
304            ignore_analytics: config.ignore_analytics,
305            ignore_prefetch: config.ignore_prefetch,
306            ignore_ads: config.ignore_ads,
307            extra_headers: config.extra_headers.clone(),
308            only_html: config.only_html,
309            service_worker_enabled: config.service_worker_enabled,
310            created_first_target: false,
311            intercept_manager: config.intercept_manager,
312            max_bytes_allowed: config.max_bytes_allowed,
313            max_redirects: config.max_redirects,
314            max_main_frame_navigations: config.max_main_frame_navigations,
315            whitelist_patterns: config.whitelist_patterns.clone(),
316            blacklist_patterns: config.blacklist_patterns.clone(),
317            #[cfg(feature = "adblock")]
318            adblock_filter_rules: config.adblock_filter_rules.clone(),
319            channel_capacity: config.channel_capacity,
320            page_channel_capacity: config.page_channel_capacity,
321            connection_retries: config.connection_retries,
322        };
323
324        let fut = Handler::new(conn, rx, handler_config);
325        let browser_context = fut.default_browser_context().clone();
326
327        let browser = Self {
328            sender: tx,
329            config: Some(config),
330            child: Some(child),
331            debug_ws_url,
332            browser_context,
333        };
334
335        Ok((browser, fut))
336    }
337
338    /// Request to fetch all existing browser targets.
339    ///
340    /// By default, only targets launched after the browser connection are tracked
341    /// when connecting to a existing browser instance with the devtools websocket url
342    /// This function fetches existing targets on the browser and adds them as pages internally
343    ///
344    /// The pages are not guaranteed to be ready as soon as the function returns
345    /// You should wait a few millis if you need to use a page
346    /// Returns [TargetInfo]
347    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
348        let (tx, rx) = oneshot_channel();
349
350        self.sender.send(HandlerMessage::FetchTargets(tx)).await?;
351
352        rx.await?
353    }
354
355    /// Request for the browser to close completely.
356    ///
357    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
358    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
359    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
360    /// [`Browser::drop`] waits automatically if needed.
361    pub async fn close(&self) -> Result<CloseReturns> {
362        let (tx, rx) = oneshot_channel();
363
364        self.sender.send(HandlerMessage::CloseBrowser(tx)).await?;
365
366        rx.await?
367    }
368
369    /// Asynchronously wait for the spawned chromium instance to exit completely.
370    ///
371    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
372    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
373    /// "zombie" processes.
374    ///
375    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
376    /// connected to an existing browser through [`Browser::connect`])
377    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
378        if let Some(child) = self.child.as_mut() {
379            Ok(Some(child.wait().await?))
380        } else {
381            Ok(None)
382        }
383    }
384
385    /// If the spawned chromium instance has completely exited, wait for it.
386    ///
387    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
388    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
389    /// "zombie" processes.
390    ///
391    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
392    /// connected to an existing browser through [`Browser::connect`])
393    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
394        if let Some(child) = self.child.as_mut() {
395            child.try_wait()
396        } else {
397            Ok(None)
398        }
399    }
400
401    /// Get the spawned chromium instance
402    ///
403    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
404    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
405    ///
406    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
407    /// for the selected runtime.
408    ///
409    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
410    /// connected to an existing browser through [`Browser::connect`])
411    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
412        self.child.as_mut()
413    }
414
415    /// Has a browser instance launched on system.
416    pub fn has_child(&self) -> bool {
417        self.child.is_some()
418    }
419
420    /// Forcibly kill the spawned chromium instance
421    ///
422    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
423    /// process to exit to avoid "zombie" processes.
424    ///
425    /// This method is provided to help if the browser does not close by itself. You should prefer
426    /// to use [`Browser::close`].
427    ///
428    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
429    /// connected to an existing browser through [`Browser::connect`])
430    pub async fn kill(&mut self) -> Option<io::Result<()>> {
431        match self.child.as_mut() {
432            Some(child) => Some(child.kill().await),
433            None => None,
434        }
435    }
436
437    /// If not launched as incognito this creates a new incognito browser
438    /// context. After that this browser exists within the incognito session.
439    /// New pages created while being in incognito mode will also run in the
440    /// incognito context. Incognito contexts won't share cookies/cache with
441    /// other browser contexts.
442    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
443        if !self.is_incognito_configured() {
444            let browser_context_id = self
445                .create_browser_context(CreateBrowserContextParams::default())
446                .await?;
447            self.browser_context = BrowserContext::from(browser_context_id);
448            self.sender
449                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
450                .await?;
451        }
452
453        Ok(self)
454    }
455
456    /// If a incognito session was created with
457    /// `Browser::start_incognito_context` this disposes this context.
458    ///
459    /// # Note This will also dispose all pages that were running within the
460    /// incognito context.
461    pub async fn quit_incognito_context_base(
462        &self,
463        browser_context_id: BrowserContextId,
464    ) -> Result<&Self> {
465        self.dispose_browser_context(browser_context_id.clone())
466            .await?;
467        self.sender
468            .send(HandlerMessage::DisposeContext(BrowserContext::from(
469                browser_context_id,
470            )))
471            .await?;
472        Ok(self)
473    }
474
475    /// If a incognito session was created with
476    /// `Browser::start_incognito_context` this disposes this context.
477    ///
478    /// # Note This will also dispose all pages that were running within the
479    /// incognito context.
480    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
481        if let Some(id) = self.browser_context.take() {
482            let _ = self.quit_incognito_context_base(id).await;
483        }
484        Ok(self)
485    }
486
487    /// Whether incognito mode was configured from the start
488    fn is_incognito_configured(&self) -> bool {
489        self.config
490            .as_ref()
491            .map(|c| c.incognito)
492            .unwrap_or_default()
493    }
494
495    /// Returns the address of the websocket this browser is attached to
496    pub fn websocket_address(&self) -> &String {
497        &self.debug_ws_url
498    }
499
500    /// Whether the BrowserContext is incognito.
501    pub fn is_incognito(&self) -> bool {
502        self.is_incognito_configured() || self.browser_context.is_incognito()
503    }
504
505    /// The config of the spawned chromium instance if any.
506    pub fn config(&self) -> Option<&BrowserConfig> {
507        self.config.as_ref()
508    }
509
510    /// Create a new browser page
511    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
512        let (tx, rx) = oneshot_channel();
513        let mut params = params.into();
514
515        if let Some(id) = self.browser_context.id() {
516            if params.browser_context_id.is_none() {
517                params.browser_context_id = Some(id.clone());
518            }
519        }
520
521        let _ = self
522            .sender
523            .send(HandlerMessage::CreatePage(params, tx))
524            .await;
525
526        rx.await?
527    }
528
529    /// Version information about the browser
530    pub async fn version(&self) -> Result<GetVersionReturns> {
531        Ok(self.execute(GetVersionParams::default()).await?.result)
532    }
533
534    /// Returns the user agent of the browser
535    pub async fn user_agent(&self) -> Result<String> {
536        Ok(self.version().await?.user_agent)
537    }
538
539    /// Call a browser method.
540    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
541        let (tx, rx) = oneshot_channel();
542        let method = cmd.identifier();
543        let msg = CommandMessage::new(cmd, tx)?;
544
545        self.sender.send(HandlerMessage::Command(msg)).await?;
546        let resp = rx.await??;
547        to_command_response::<T>(resp, method)
548    }
549
550    /// Set permission settings for given embedding and embedded origins.
551    /// [PermissionDescriptor](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionDescriptor)
552    /// [PermissionSetting](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionSetting)
553    pub async fn set_permission(
554        &self,
555        permission: PermissionDescriptor,
556        setting: PermissionSetting,
557        origin: Option<impl Into<String>>,
558        embedded_origin: Option<impl Into<String>>,
559        browser_context_id: Option<BrowserContextId>,
560    ) -> Result<&Self> {
561        self.execute(SetPermissionParams {
562            permission,
563            setting,
564            origin: origin.map(Into::into),
565            embedded_origin: embedded_origin.map(Into::into),
566            browser_context_id: browser_context_id.or_else(|| self.browser_context.id.clone()),
567        })
568        .await?;
569        Ok(self)
570    }
571
572    /// Convenience: set a permission for a single origin using the current browser context.
573    pub async fn set_permission_for_origin(
574        &self,
575        origin: impl Into<String>,
576        embedded_origin: Option<impl Into<String>>,
577        permission: PermissionDescriptor,
578        setting: PermissionSetting,
579    ) -> Result<&Self> {
580        self.set_permission(permission, setting, Some(origin), embedded_origin, None)
581            .await
582    }
583
584    /// "Reset" a permission override by setting it back to Prompt.
585    pub async fn reset_permission_for_origin(
586        &self,
587        origin: impl Into<String>,
588        embedded_origin: Option<impl Into<String>>,
589        permission: PermissionDescriptor,
590    ) -> Result<&Self> {
591        self.set_permission_for_origin(
592            origin,
593            embedded_origin,
594            permission,
595            PermissionSetting::Prompt,
596        )
597        .await
598    }
599
600    /// "Grant" all permissions.
601    pub async fn grant_all_permission_for_origin(
602        &self,
603        origin: impl Into<String>,
604        embedded_origin: Option<impl Into<String>>,
605        permission: PermissionDescriptor,
606    ) -> Result<&Self> {
607        self.set_permission_for_origin(
608            origin,
609            embedded_origin,
610            permission,
611            PermissionSetting::Granted,
612        )
613        .await
614    }
615
616    /// "Deny" all permissions.
617    pub async fn deny_all_permission_for_origin(
618        &self,
619        origin: impl Into<String>,
620        embedded_origin: Option<impl Into<String>>,
621        permission: PermissionDescriptor,
622    ) -> Result<&Self> {
623        self.set_permission_for_origin(
624            origin,
625            embedded_origin,
626            permission,
627            PermissionSetting::Denied,
628        )
629        .await
630    }
631
632    /// Return all of the pages of the browser
633    pub async fn pages(&self) -> Result<Vec<Page>> {
634        let (tx, rx) = oneshot_channel();
635        self.sender.send(HandlerMessage::GetPages(tx)).await?;
636        Ok(rx.await?)
637    }
638
639    /// Return page of given target_id
640    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
641        let (tx, rx) = oneshot_channel();
642        self.sender
643            .send(HandlerMessage::GetPage(target_id, tx))
644            .await?;
645        rx.await?.ok_or(CdpError::NotFound)
646    }
647
648    /// Set listener for browser event
649    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
650        let (tx, rx) = unbounded_channel();
651        self.sender
652            .send(HandlerMessage::AddEventListener(
653                EventListenerRequest::new::<T>(tx),
654            ))
655            .await?;
656
657        Ok(EventStream::new(rx))
658    }
659
660    /// Creates a new empty browser context.
661    pub async fn create_browser_context(
662        &mut self,
663        params: CreateBrowserContextParams,
664    ) -> Result<BrowserContextId> {
665        let response = self.execute(params).await?;
666
667        Ok(response.result.browser_context_id)
668    }
669
670    /// Returns all browser contexts created with Target.createBrowserContext method.
671    pub async fn get_browser_contexts(
672        &mut self,
673        params: GetBrowserContextsParams,
674    ) -> Result<GetBrowserContextsReturns> {
675        let response = self.execute(params).await?;
676        Ok(response.result)
677    }
678
679    /// Send a new empty browser context.
680    pub async fn send_new_context(
681        &mut self,
682        browser_context_id: BrowserContextId,
683    ) -> Result<&Self> {
684        self.browser_context = BrowserContext::from(browser_context_id);
685        self.sender
686            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
687            .await?;
688        Ok(self)
689    }
690
691    /// Deletes a browser context.
692    pub async fn dispose_browser_context(
693        &self,
694        browser_context_id: impl Into<BrowserContextId>,
695    ) -> Result<&Self> {
696        self.execute(DisposeBrowserContextParams::new(browser_context_id))
697            .await?;
698
699        Ok(self)
700    }
701
702    /// Clears cookies.
703    pub async fn clear_cookies(&self) -> Result<&Self> {
704        self.execute(ClearCookiesParams::default()).await?;
705        Ok(self)
706    }
707
708    /// Returns all browser cookies.
709    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
710        let cmd = GetCookiesParams {
711            browser_context_id: self.browser_context.id.clone(),
712        };
713
714        Ok(self.execute(cmd).await?.result.cookies)
715    }
716
717    /// Sets given cookies.
718    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
719        for cookie in &mut cookies {
720            if let Some(url) = cookie.url.as_ref() {
721                crate::page::validate_cookie_url(url)?;
722            }
723        }
724
725        let mut cookies_param = SetCookiesParams::new(cookies);
726
727        cookies_param.browser_context_id = self.browser_context.id.clone();
728
729        self.execute(cookies_param).await?;
730        Ok(self)
731    }
732}
733
734impl Drop for Browser {
735    fn drop(&mut self) {
736        if let Some(child) = self.child.as_mut() {
737            if let Ok(Some(_)) = child.try_wait() {
738                // Already exited, do nothing. Usually occurs after using the method close or kill.
739            } else {
740                // We set the `kill_on_drop` property for the child process, so no need to explicitely
741                // kill it here. It can't really be done anyway since the method is async.
742                //
743                // On Unix, the process will be reaped in the background by the runtime automatically
744                // so it won't leave any resources locked. It is, however, a better practice for the user to
745                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
746                // warn him here.
747                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
748            }
749        }
750    }
751}
752
753/// Resolve devtools WebSocket URL from the provided browser process
754///
755/// If an error occurs, it returns the browser's stderr output.
756///
757/// The URL resolution fails if:
758/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
759/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
760/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
761///   the browser's stderr: end of stream, invalid UTF-8, other
762async fn ws_url_from_output(
763    child_process: &mut Child,
764    timeout_fut: impl Future<Output = ()> + Unpin,
765) -> Result<String> {
766    use tokio::io::AsyncBufReadExt;
767    let stderr = match child_process.stderr.take() {
768        Some(stderr) => stderr,
769        None => {
770            return Err(CdpError::LaunchIo(
771                io::Error::new(io::ErrorKind::NotFound, "browser process has no stderr"),
772                BrowserStderr::new(Vec::new()),
773            ));
774        }
775    };
776    let mut stderr_bytes = Vec::<u8>::new();
777    let mut buf = tokio::io::BufReader::new(stderr);
778    let mut timeout_fut = timeout_fut;
779    loop {
780        tokio::select! {
781            _ = &mut timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
782            exit_status = child_process.wait() => {
783                return Err(match exit_status {
784                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
785                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
786                })
787            },
788            read_res = buf.read_until(b'\n', &mut stderr_bytes) => {
789                match read_res {
790                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
791                    Ok(byte_count) => {
792                        if byte_count == 0 {
793                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
794                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
795                        }
796                        let start_offset = stderr_bytes.len() - byte_count;
797                        let new_bytes = &stderr_bytes[start_offset..];
798                        match std::str::from_utf8(new_bytes) {
799                            Err(_) => {
800                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
801                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
802                            }
803                            Ok(line) => {
804                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
805                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
806                                        return Ok(ws.trim().to_string());
807                                    }
808                                }
809                            }
810                        }
811                    }
812                }
813            }
814        }
815    }
816}
817
818#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
819pub enum HeadlessMode {
820    /// The "headful" mode.
821    False,
822    /// The old headless mode.
823    #[default]
824    True,
825    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
826    New,
827}
828
829#[derive(Debug, Clone, Default)]
830pub struct BrowserConfig {
831    /// Determines whether to run headless version of the browser. Defaults to
832    /// true.
833    headless: HeadlessMode,
834    /// Determines whether to run the browser with a sandbox.
835    sandbox: bool,
836    /// Launch the browser with a specific window width and height.
837    window_size: Option<(u32, u32)>,
838    /// Launch the browser with a specific debugging port.
839    port: u16,
840    /// Path for Chrome or Chromium.
841    ///
842    /// If unspecified, the create will try to automatically detect a suitable
843    /// binary.
844    executable: std::path::PathBuf,
845
846    /// A list of Chrome extensions to load.
847    ///
848    /// An extension should be a path to a folder containing the extension code.
849    /// CRX files cannot be used directly and must be first extracted.
850    ///
851    /// Note that Chrome does not support loading extensions in headless-mode.
852    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
853    extensions: Vec<String>,
854
855    /// Environment variables to set for the Chromium process.
856    /// Passes value through to std::process::Command::envs.
857    pub process_envs: Option<HashMap<String, String>>,
858
859    /// Data dir for user data
860    pub user_data_dir: Option<PathBuf>,
861
862    /// Whether to launch the `Browser` in incognito mode.
863    incognito: bool,
864
865    /// Timeout duration for `Browser::launch`.
866    launch_timeout: Duration,
867
868    /// Ignore https errors, default is true.
869    ignore_https_errors: bool,
870    pub viewport: Option<Viewport>,
871    /// The duration after a request with no response should time out.
872    request_timeout: Duration,
873
874    /// Additional command line arguments to pass to the browser instance.
875    args: Vec<String>,
876
877    /// Whether to disable DEFAULT_ARGS or not, default is false.
878    disable_default_args: bool,
879
880    /// Whether to enable request interception.
881    pub request_intercept: bool,
882
883    /// Whether to enable cache.
884    pub cache_enabled: bool,
885    /// Whether to enable or disable Service Workers.
886    /// Disabling may reduce background network activity and caching effects.
887    pub service_worker_enabled: bool,
888    /// Whether to ignore image/visual requests during interception.
889    /// Can reduce bandwidth and speed up crawling when visuals are unnecessary.
890    pub ignore_visuals: bool,
891    /// Whether to ignore stylesheet (CSS) requests during interception.
892    /// Useful for content-only crawls.
893    pub ignore_stylesheets: bool,
894    /// Whether to ignore JavaScript requests during interception.
895    /// This still allows critical framework bundles to pass when applicable.
896    pub ignore_javascript: bool,
897    /// Whether to ignore analytics/telemetry requests during interception.
898    pub ignore_analytics: bool,
899    /// Ignore prefetch request.
900    pub ignore_prefetch: bool,
901    /// Whether to ignore ad network requests during interception.
902    pub ignore_ads: bool,
903    /// Extra headers.
904    pub extra_headers: Option<std::collections::HashMap<String, String>>,
905    /// Only html
906    pub only_html: bool,
907    /// The interception intercept manager.
908    pub intercept_manager: NetworkInterceptManager,
909    /// The max bytes to receive.
910    pub max_bytes_allowed: Option<u64>,
911    /// Cap on Document-type redirect hops before the navigation is aborted.
912    /// `None` disables enforcement; `Some(n)` mirrors `reqwest::redirect::Policy::limited(n)`.
913    pub max_redirects: Option<usize>,
914    /// Cap on main-frame cross-document navigations per `goto`. Defends against
915    /// JS / meta-refresh loops that bypass the HTTP redirect guard. `None`
916    /// disables the guard.
917    pub max_main_frame_navigations: Option<u32>,
918    /// Whitelist patterns to allow through the network.
919    pub whitelist_patterns: Option<Vec<String>>,
920    /// Blacklist patterns to block through the network.
921    pub blacklist_patterns: Option<Vec<String>>,
922    /// Extra ABP/uBO filter rules to load into the adblock engine (requires `adblock` feature).
923    /// These are merged with the built-in `ADBLOCK_PATTERNS` for richer blocking
924    /// (e.g. EasyList / EasyPrivacy content).
925    #[cfg(feature = "adblock")]
926    pub adblock_filter_rules: Option<Vec<String>>,
927    /// Capacity of the channel between browser handle and handler.
928    /// Defaults to 1000.
929    pub channel_capacity: usize,
930    /// Capacity of the per-page mpsc channel carrying `TargetMessage`s
931    /// from each `Page` to the handler. Defaults to 2048; override via
932    /// `page_channel_capacity(N)` on the builder. Values of `0` are
933    /// clamped to `1` at channel creation.
934    pub page_channel_capacity: usize,
935    /// Number of WebSocket connection retry attempts with exponential backoff.
936    /// Defaults to 4.
937    pub connection_retries: u32,
938}
939
940#[derive(Debug, Clone)]
941pub struct BrowserConfigBuilder {
942    /// Headless mode configuration for the browser.
943    headless: HeadlessMode,
944    /// Whether to run the browser with a sandbox.
945    sandbox: bool,
946    /// Optional initial browser window size `(width, height)`.
947    window_size: Option<(u32, u32)>,
948    /// DevTools debugging port to bind to.
949    port: u16,
950    /// Optional explicit path to the Chrome/Chromium executable.
951    /// If `None`, auto-detection may be attempted based on `executation_detection`.
952    executable: Option<PathBuf>,
953    /// Controls auto-detection behavior for finding a Chrome/Chromium binary.
954    executation_detection: DetectionOptions,
955    /// List of unpacked extensions (directories) to load at startup.
956    extensions: Vec<String>,
957    /// Environment variables to set on the spawned Chromium process.
958    process_envs: Option<HashMap<String, String>>,
959    /// User data directory to persist browser state, or `None` for ephemeral.
960    user_data_dir: Option<PathBuf>,
961    /// Whether to start the browser in incognito (off-the-record) mode.
962    incognito: bool,
963    /// Maximum time to wait for the browser to launch and become ready.
964    launch_timeout: Duration,
965    /// Whether to ignore HTTPS/TLS errors during navigation and requests.
966    ignore_https_errors: bool,
967    /// Default page viewport configuration applied on startup.
968    viewport: Option<Viewport>,
969    /// Timeout for individual network requests without response progress.
970    request_timeout: Duration,
971    /// Additional command-line flags passed directly to the browser process.
972    args: Vec<String>,
973    /// Disable the default argument set and use only the provided `args`.
974    disable_default_args: bool,
975    /// Enable Network.requestInterception for request filtering/handling.
976    request_intercept: bool,
977    /// Enable the browser cache for navigations and subresources.
978    cache_enabled: bool,
979    /// Enable/disable Service Workers.
980    service_worker_enabled: bool,
981    /// Drop image/visual requests when interception is enabled.
982    ignore_visuals: bool,
983    /// Drop ad network requests when interception is enabled.
984    ignore_ads: bool,
985    /// Drop JavaScript requests when interception is enabled.
986    ignore_javascript: bool,
987    /// Drop stylesheet (CSS) requests when interception is enabled.
988    ignore_stylesheets: bool,
989    /// Ignore prefetch domains.
990    ignore_prefetch: bool,
991    /// Drop analytics/telemetry requests when interception is enabled.
992    ignore_analytics: bool,
993    /// If `true`, limit fetching to HTML documents.
994    only_html: bool,
995    /// Extra HTTP headers to include with every request.
996    extra_headers: Option<std::collections::HashMap<String, String>>,
997    /// Network interception manager used to configure filtering behavior.
998    intercept_manager: NetworkInterceptManager,
999    /// Optional upper bound on bytes that may be received (per session/run).
1000    max_bytes_allowed: Option<u64>,
1001    /// Optional cap on Document redirect hops per navigation (`None` = disabled).
1002    max_redirects: Option<usize>,
1003    /// Optional cap on main-frame cross-document navigations per goto.
1004    max_main_frame_navigations: Option<u32>,
1005    /// Whitelist patterns to allow through the network.
1006    whitelist_patterns: Option<Vec<String>>,
1007    /// Blacklist patterns to block through the network.
1008    blacklist_patterns: Option<Vec<String>>,
1009    /// Extra ABP/uBO filter rules for the adblock engine.
1010    #[cfg(feature = "adblock")]
1011    adblock_filter_rules: Option<Vec<String>>,
1012    /// Capacity of the channel between browser handle and handler.
1013    channel_capacity: usize,
1014    /// Capacity of the per-page mpsc `TargetMessage` channel.
1015    page_channel_capacity: usize,
1016    /// Number of WebSocket connection retry attempts.
1017    connection_retries: u32,
1018}
1019
1020impl BrowserConfig {
1021    /// Browser builder default config.
1022    pub fn builder() -> BrowserConfigBuilder {
1023        BrowserConfigBuilder::default()
1024    }
1025
1026    /// Launch with the executable path.
1027    pub fn with_executable(path: impl AsRef<Path>) -> Self {
1028        // SAFETY: build() only fails when no executable is provided,
1029        // but we always provide one via chrome_executable().
1030        Self::builder().chrome_executable(path).build().unwrap()
1031    }
1032}
1033
1034impl Default for BrowserConfigBuilder {
1035    fn default() -> Self {
1036        Self {
1037            headless: HeadlessMode::True,
1038            sandbox: true,
1039            window_size: None,
1040            port: 0,
1041            executable: None,
1042            executation_detection: DetectionOptions::default(),
1043            extensions: Vec::new(),
1044            process_envs: None,
1045            user_data_dir: None,
1046            incognito: false,
1047            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
1048            ignore_https_errors: true,
1049            viewport: Some(Default::default()),
1050            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1051            args: Vec::new(),
1052            disable_default_args: false,
1053            request_intercept: false,
1054            cache_enabled: true,
1055            ignore_visuals: false,
1056            ignore_ads: false,
1057            ignore_javascript: false,
1058            ignore_analytics: false,
1059            ignore_stylesheets: false,
1060            ignore_prefetch: true,
1061            only_html: false,
1062            extra_headers: Default::default(),
1063            service_worker_enabled: true,
1064            intercept_manager: NetworkInterceptManager::Unknown,
1065            max_bytes_allowed: None,
1066            max_redirects: None,
1067            max_main_frame_navigations: None,
1068            whitelist_patterns: None,
1069            blacklist_patterns: None,
1070            #[cfg(feature = "adblock")]
1071            adblock_filter_rules: None,
1072            channel_capacity: 4096,
1073            page_channel_capacity: crate::handler::page::DEFAULT_PAGE_CHANNEL_CAPACITY,
1074            connection_retries: crate::conn::DEFAULT_CONNECTION_RETRIES,
1075        }
1076    }
1077}
1078
1079impl BrowserConfigBuilder {
1080    /// Configure window size.
1081    pub fn window_size(mut self, width: u32, height: u32) -> Self {
1082        self.window_size = Some((width, height));
1083        self
1084    }
1085    /// Configure sandboxing.
1086    pub fn no_sandbox(mut self) -> Self {
1087        self.sandbox = false;
1088        self
1089    }
1090    /// Configure the launch to start non headless.
1091    pub fn with_head(mut self) -> Self {
1092        self.headless = HeadlessMode::False;
1093        self
1094    }
1095    /// Configure the launch with the new headless mode.
1096    pub fn new_headless_mode(mut self) -> Self {
1097        self.headless = HeadlessMode::New;
1098        self
1099    }
1100    /// Configure the launch with headless.
1101    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
1102        self.headless = mode;
1103        self
1104    }
1105    /// Configure the launch in incognito.
1106    pub fn incognito(mut self) -> Self {
1107        self.incognito = true;
1108        self
1109    }
1110
1111    pub fn respect_https_errors(mut self) -> Self {
1112        self.ignore_https_errors = false;
1113        self
1114    }
1115
1116    pub fn port(mut self, port: u16) -> Self {
1117        self.port = port;
1118        self
1119    }
1120
1121    pub fn with_max_bytes_allowed(mut self, max_bytes_allowed: Option<u64>) -> Self {
1122        self.max_bytes_allowed = max_bytes_allowed;
1123        self
1124    }
1125
1126    /// Cap the number of Document-type redirect hops per navigation.
1127    ///
1128    /// `None` disables enforcement (default, preserves Chromium's own ~20-hop cap).
1129    /// `Some(n)` aborts once a navigation chain exceeds `n` by emitting
1130    /// `net::ERR_TOO_MANY_REDIRECTS` and calling `Page.stopLoading`.
1131    pub fn with_max_redirects(mut self, max_redirects: Option<usize>) -> Self {
1132        self.max_redirects = max_redirects;
1133        self
1134    }
1135
1136    /// Cap the number of main-frame cross-document navigations allowed per
1137    /// `goto` call.
1138    ///
1139    /// Defends against JS `location.href` / meta-refresh loops that bypass
1140    /// HTTP-level redirect detection — each hop looks like a fresh document
1141    /// to Chromium, so `with_max_redirects` alone cannot catch them. `None`
1142    /// disables the guard (default).
1143    pub fn with_max_main_frame_navigations(mut self, cap: Option<u32>) -> Self {
1144        self.max_main_frame_navigations = cap;
1145        self
1146    }
1147
1148    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
1149        self.launch_timeout = timeout;
1150        self
1151    }
1152
1153    pub fn request_timeout(mut self, timeout: Duration) -> Self {
1154        self.request_timeout = timeout;
1155        self
1156    }
1157
1158    /// Configures the viewport of the browser, which defaults to `800x600`.
1159    /// `None` disables viewport emulation (i.e., it uses the browsers default
1160    /// configuration, which fills the available space. This is similar to what
1161    /// Playwright does when you provide `null` as the value of its `viewport`
1162    /// option).
1163    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
1164        self.viewport = viewport.into();
1165        self
1166    }
1167
1168    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
1169        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
1170        self
1171    }
1172
1173    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
1174        self.executable = Some(path.as_ref().to_path_buf());
1175        self
1176    }
1177
1178    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
1179        self.executation_detection = options;
1180        self
1181    }
1182
1183    pub fn extension(mut self, extension: impl Into<String>) -> Self {
1184        self.extensions.push(extension.into());
1185        self
1186    }
1187
1188    pub fn extensions<I, S>(mut self, extensions: I) -> Self
1189    where
1190        I: IntoIterator<Item = S>,
1191        S: Into<String>,
1192    {
1193        for ext in extensions {
1194            self.extensions.push(ext.into());
1195        }
1196        self
1197    }
1198
1199    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
1200        self.process_envs
1201            .get_or_insert(HashMap::new())
1202            .insert(key.into(), val.into());
1203        self
1204    }
1205
1206    pub fn envs<I, K, V>(mut self, envs: I) -> Self
1207    where
1208        I: IntoIterator<Item = (K, V)>,
1209        K: Into<String>,
1210        V: Into<String>,
1211    {
1212        self.process_envs
1213            .get_or_insert(HashMap::new())
1214            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
1215        self
1216    }
1217
1218    pub fn arg(mut self, arg: impl Into<String>) -> Self {
1219        self.args.push(arg.into());
1220        self
1221    }
1222
1223    pub fn args<I, S>(mut self, args: I) -> Self
1224    where
1225        I: IntoIterator<Item = S>,
1226        S: Into<String>,
1227    {
1228        for arg in args {
1229            self.args.push(arg.into());
1230        }
1231        self
1232    }
1233
1234    pub fn disable_default_args(mut self) -> Self {
1235        self.disable_default_args = true;
1236        self
1237    }
1238
1239    pub fn enable_request_intercept(mut self) -> Self {
1240        self.request_intercept = true;
1241        self
1242    }
1243
1244    pub fn disable_request_intercept(mut self) -> Self {
1245        self.request_intercept = false;
1246        self
1247    }
1248
1249    pub fn enable_cache(mut self) -> Self {
1250        self.cache_enabled = true;
1251        self
1252    }
1253
1254    pub fn disable_cache(mut self) -> Self {
1255        self.cache_enabled = false;
1256        self
1257    }
1258
1259    /// Set service worker enabled.
1260    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
1261        self.service_worker_enabled = bypass;
1262        self
1263    }
1264
1265    /// Set extra request headers.
1266    pub fn set_extra_headers(
1267        mut self,
1268        headers: Option<std::collections::HashMap<String, String>>,
1269    ) -> Self {
1270        self.extra_headers = headers;
1271        self
1272    }
1273
1274    /// Set whitelist patterns to allow through network interception allowing.
1275    pub fn set_whitelist_patterns(mut self, whitelist_patterns: Option<Vec<String>>) -> Self {
1276        self.whitelist_patterns = whitelist_patterns;
1277        self
1278    }
1279
1280    /// Set blacklist patterns to block through network interception.
1281    pub fn set_blacklist_patterns(mut self, blacklist_patterns: Option<Vec<String>>) -> Self {
1282        self.blacklist_patterns = blacklist_patterns;
1283        self
1284    }
1285
1286    /// Set extra ABP/uBO filter rules for the adblock engine.
1287    /// Pass EasyList/EasyPrivacy content lines for richer blocking coverage.
1288    #[cfg(feature = "adblock")]
1289    pub fn set_adblock_filter_rules(mut self, rules: Vec<String>) -> Self {
1290        self.adblock_filter_rules = Some(rules);
1291        self
1292    }
1293
1294    /// Set the capacity of the channel between browser handle and handler.
1295    /// Defaults to 1000.
1296    pub fn channel_capacity(mut self, capacity: usize) -> Self {
1297        self.channel_capacity = capacity;
1298        self
1299    }
1300
1301    /// Set the capacity of the per-page mpsc channel carrying
1302    /// `TargetMessage`s from each `Page` to the handler.
1303    ///
1304    /// Defaults to 2048 (the previous hard-coded value). Tune upward to
1305    /// absorb bursts of commands without pushing them onto the
1306    /// `CommandFuture` async-send fallback path; tune downward to apply
1307    /// back-pressure sooner. Values of `0` are clamped to `1` at channel
1308    /// creation time (tokio panics on a zero-capacity mpsc).
1309    pub fn page_channel_capacity(mut self, capacity: usize) -> Self {
1310        self.page_channel_capacity = capacity;
1311        self
1312    }
1313
1314    /// Set the number of WebSocket connection retry attempts with exponential backoff.
1315    /// Defaults to 4. Set to 0 for a single attempt with no retries.
1316    pub fn connection_retries(mut self, retries: u32) -> Self {
1317        self.connection_retries = retries;
1318        self
1319    }
1320
1321    /// Build the browser.
1322    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1323        let executable = if let Some(e) = self.executable {
1324            e
1325        } else {
1326            detection::default_executable(self.executation_detection)?
1327        };
1328
1329        Ok(BrowserConfig {
1330            headless: self.headless,
1331            sandbox: self.sandbox,
1332            window_size: self.window_size,
1333            port: self.port,
1334            executable,
1335            extensions: self.extensions,
1336            process_envs: self.process_envs,
1337            user_data_dir: self.user_data_dir,
1338            incognito: self.incognito,
1339            launch_timeout: self.launch_timeout,
1340            ignore_https_errors: self.ignore_https_errors,
1341            viewport: self.viewport,
1342            request_timeout: self.request_timeout,
1343            args: self.args,
1344            disable_default_args: self.disable_default_args,
1345            request_intercept: self.request_intercept,
1346            cache_enabled: self.cache_enabled,
1347            ignore_visuals: self.ignore_visuals,
1348            ignore_ads: self.ignore_ads,
1349            ignore_javascript: self.ignore_javascript,
1350            ignore_analytics: self.ignore_analytics,
1351            ignore_stylesheets: self.ignore_stylesheets,
1352            ignore_prefetch: self.ignore_prefetch,
1353            extra_headers: self.extra_headers,
1354            only_html: self.only_html,
1355            intercept_manager: self.intercept_manager,
1356            service_worker_enabled: self.service_worker_enabled,
1357            max_bytes_allowed: self.max_bytes_allowed,
1358            max_redirects: self.max_redirects,
1359            max_main_frame_navigations: self.max_main_frame_navigations,
1360            whitelist_patterns: self.whitelist_patterns,
1361            blacklist_patterns: self.blacklist_patterns,
1362            #[cfg(feature = "adblock")]
1363            adblock_filter_rules: self.adblock_filter_rules,
1364            channel_capacity: self.channel_capacity,
1365            page_channel_capacity: self.page_channel_capacity,
1366            connection_retries: self.connection_retries,
1367        })
1368    }
1369}
1370
1371impl BrowserConfig {
1372    pub fn launch(&self) -> io::Result<Child> {
1373        let mut cmd = async_process::Command::new(&self.executable);
1374
1375        if self.disable_default_args {
1376            cmd.args(&self.args);
1377        } else {
1378            cmd.args(DEFAULT_ARGS).args(&self.args);
1379        }
1380
1381        if !self
1382            .args
1383            .iter()
1384            .any(|arg| arg.contains("--remote-debugging-port="))
1385        {
1386            cmd.arg(format!("--remote-debugging-port={}", self.port));
1387        }
1388
1389        cmd.args(
1390            self.extensions
1391                .iter()
1392                .map(|e| format!("--load-extension={e}")),
1393        );
1394
1395        if let Some(ref user_data) = self.user_data_dir {
1396            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1397        } else {
1398            // If the user did not specify a data directory, this would default to the systems default
1399            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1400            // a temp dir just for chromiumoxide instead.
1401            cmd.arg(format!(
1402                "--user-data-dir={}",
1403                std::env::temp_dir().join("chromiumoxide-runner").display()
1404            ));
1405        }
1406
1407        if let Some((width, height)) = self.window_size {
1408            cmd.arg(format!("--window-size={width},{height}"));
1409        }
1410
1411        if !self.sandbox {
1412            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1413        }
1414
1415        match self.headless {
1416            HeadlessMode::False => (),
1417            HeadlessMode::True => {
1418                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1419            }
1420            HeadlessMode::New => {
1421                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1422            }
1423        }
1424
1425        if self.incognito {
1426            cmd.arg("--incognito");
1427        }
1428
1429        if let Some(ref envs) = self.process_envs {
1430            cmd.envs(envs);
1431        }
1432        cmd.stderr(Stdio::piped()).spawn()
1433    }
1434}
1435
1436/// Returns the path to Chrome's executable.
1437///
1438/// If the `CHROME` environment variable is set, `default_executable` will
1439/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1440/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1441/// searched for in standard places. If that fails,
1442/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1443/// Windows) is consulted. If all of the above fail, an error is returned.
1444#[deprecated(note = "Use detection::default_executable instead")]
1445pub fn default_executable() -> Result<std::path::PathBuf, String> {
1446    let options = DetectionOptions {
1447        msedge: false,
1448        unstable: false,
1449    };
1450    detection::default_executable(options)
1451}
1452
1453/// These are passed to the Chrome binary by default.
1454/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1455static DEFAULT_ARGS: [&str; 26] = [
1456    "--disable-background-networking",
1457    "--enable-features=NetworkService,NetworkServiceInProcess",
1458    "--disable-background-timer-throttling",
1459    "--disable-backgrounding-occluded-windows",
1460    "--disable-breakpad",
1461    "--disable-client-side-phishing-detection",
1462    "--disable-component-extensions-with-background-pages",
1463    "--disable-default-apps",
1464    "--disable-dev-shm-usage",
1465    "--disable-extensions",
1466    "--disable-features=TranslateUI",
1467    "--disable-hang-monitor",
1468    "--disable-ipc-flooding-protection",
1469    "--disable-popup-blocking",
1470    "--disable-prompt-on-repost",
1471    "--disable-renderer-backgrounding",
1472    "--disable-sync",
1473    "--force-color-profile=srgb",
1474    "--metrics-recording-only",
1475    "--no-first-run",
1476    "--enable-automation",
1477    "--password-store=basic",
1478    "--use-mock-keychain",
1479    "--enable-blink-features=IdleDetection",
1480    "--lang=en_US",
1481    "--disable-blink-features=AutomationControlled",
1482];