Skip to main content

chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use tokio::sync::mpsc::{channel, unbounded_channel, Sender};
11use tokio::sync::oneshot::channel as oneshot_channel;
12
13use crate::async_process::{self, Child, ExitStatus, Stdio};
14use crate::cmd::{to_command_response, CommandMessage};
15use crate::conn::Connection;
16use crate::detection::{self, DetectionOptions};
17use crate::error::{BrowserStderr, CdpError, Result};
18use crate::handler::browser::BrowserContext;
19use crate::handler::viewport::Viewport;
20use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
21use crate::listeners::{EventListenerRequest, EventStream};
22use crate::page::Page;
23use crate::utils;
24use chromiumoxide_cdp::cdp::browser_protocol::browser::{
25    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
26};
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    PermissionDescriptor, PermissionSetting, SetPermissionParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
36    GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
37};
38
39use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
40use chromiumoxide_types::*;
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42
43/// Default `Browser::launch` timeout in MS
44pub const LAUNCH_TIMEOUT: u64 = 20_000;
45
46lazy_static::lazy_static! {
47    /// The request client to get the web socket url.
48    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
49        .timeout(Duration::from_secs(60))
50        .default_headers({
51            let mut m = HeaderMap::new();
52
53            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55            m
56        })
57        .tcp_keepalive(Some(Duration::from_secs(5)))
58        .pool_idle_timeout(Some(Duration::from_secs(60)))
59        .pool_max_idle_per_host(10)
60        .build()
61        .expect("client to build");
62}
63
64/// Returns chromey's global `reqwest::Client` for reuse by other subsystems
65/// (e.g. remote cache uploads via `spider_remote_cache`).
66pub fn request_client() -> &'static reqwest::Client {
67    &REQUEST_CLIENT
68}
69
70/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
71#[derive(Debug)]
72pub struct Browser {
73    /// The `Sender` to send messages to the connection handler that drives the
74    /// websocket
75    pub(crate) sender: Sender<HandlerMessage>,
76    /// How the spawned chromium instance was configured, if any
77    config: Option<BrowserConfig>,
78    /// The spawned chromium instance
79    child: Option<Child>,
80    /// The debug web socket url of the chromium instance
81    debug_ws_url: String,
82    /// The context of the browser
83    pub browser_context: BrowserContext,
84}
85
86/// Browser connection information.
87#[derive(serde::Deserialize, Debug, Default)]
88pub struct BrowserConnection {
89    #[serde(rename = "Browser")]
90    /// The browser name
91    pub browser: String,
92    #[serde(rename = "Protocol-Version")]
93    /// Browser version
94    pub protocol_version: String,
95    #[serde(rename = "User-Agent")]
96    /// User Agent used by default.
97    pub user_agent: String,
98    #[serde(rename = "V8-Version")]
99    /// The v8 engine version
100    pub v8_version: String,
101    #[serde(rename = "WebKit-Version")]
102    /// Webkit version
103    pub webkit_version: String,
104    #[serde(rename = "webSocketDebuggerUrl")]
105    /// Remote debugging address
106    pub web_socket_debugger_url: String,
107}
108
109impl Browser {
110    /// Connect to an already running chromium instance via the given URL.
111    ///
112    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
113    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
114        Self::connect_with_config(url, HandlerConfig::default()).await
115    }
116
117    // Connect to an already running chromium instance with a given `HandlerConfig`.
118    ///
119    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
120    pub async fn connect_with_config(
121        url: impl Into<String>,
122        config: HandlerConfig,
123    ) -> Result<(Self, Handler)> {
124        let mut debug_ws_url = url.into();
125        let retries = config.connection_retries;
126
127        if debug_ws_url.starts_with("http") {
128            let version_url = if debug_ws_url.ends_with("/json/version")
129                || debug_ws_url.ends_with("/json/version/")
130            {
131                debug_ws_url.to_owned()
132            } else {
133                format!(
134                    "{}{}json/version",
135                    &debug_ws_url,
136                    if debug_ws_url.ends_with('/') { "" } else { "/" }
137                )
138            };
139
140            let mut discovered = false;
141
142            for attempt in 0..=retries {
143                let retry = || async {
144                    if attempt < retries {
145                        let backoff_ms = 50u64 * 3u64.saturating_pow(attempt);
146                        tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
147                    }
148                };
149
150                match REQUEST_CLIENT.get(&version_url).send().await {
151                    Ok(req) => match req.bytes().await {
152                        Ok(b) => {
153                            match crate::serde_json::from_slice::<Box<BrowserConnection>>(&b) {
154                                Ok(connection)
155                                    if !connection.web_socket_debugger_url.is_empty() =>
156                                {
157                                    debug_ws_url = connection.web_socket_debugger_url;
158                                    discovered = true;
159                                    break;
160                                }
161                                _ => {
162                                    // JSON parse failed or webSocketDebuggerUrl was empty — retry
163                                    retry().await;
164                                }
165                            }
166                        }
167                        Err(_) => {
168                            retry().await;
169                        }
170                    },
171                    Err(_) => {
172                        retry().await;
173                    }
174                }
175            }
176
177            if !discovered {
178                return Err(CdpError::NoResponse);
179            }
180        }
181
182        let conn =
183            Connection::<CdpEventMessage>::connect_with_retries(&debug_ws_url, retries).await?;
184
185        let (tx, rx) = channel(config.channel_capacity);
186
187        let handler_config = BrowserConfig {
188            ignore_https_errors: config.ignore_https_errors,
189            viewport: config.viewport.clone(),
190            request_timeout: config.request_timeout,
191            request_intercept: config.request_intercept,
192            cache_enabled: config.cache_enabled,
193            ignore_visuals: config.ignore_visuals,
194            ignore_stylesheets: config.ignore_stylesheets,
195            ignore_javascript: config.ignore_javascript,
196            ignore_analytics: config.ignore_analytics,
197            ignore_prefetch: config.ignore_prefetch,
198            ignore_ads: config.ignore_ads,
199            extra_headers: config.extra_headers.clone(),
200            only_html: config.only_html,
201            service_worker_enabled: config.service_worker_enabled,
202            intercept_manager: config.intercept_manager,
203            max_bytes_allowed: config.max_bytes_allowed,
204            max_redirects: config.max_redirects,
205            max_main_frame_navigations: config.max_main_frame_navigations,
206            whitelist_patterns: config.whitelist_patterns.clone(),
207            blacklist_patterns: config.blacklist_patterns.clone(),
208            ..Default::default()
209        };
210
211        let fut = Handler::new(conn, rx, config);
212        let browser_context = fut.default_browser_context().clone();
213
214        let browser = Self {
215            sender: tx,
216            config: Some(handler_config),
217            child: None,
218            debug_ws_url,
219            browser_context,
220        };
221
222        Ok((browser, fut))
223    }
224
225    /// Launches a new instance of `chromium` in the background and attaches to
226    /// its debug web socket.
227    ///
228    /// This fails when no chromium executable could be detected.
229    ///
230    /// This fails if no web socket url could be detected from the child
231    /// processes stderr for more than the configured `launch_timeout`
232    /// (20 seconds by default).
233    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
234        // Canonalize paths to reduce issues with sandboxing
235        config.executable = utils::canonicalize_except_snap(config.executable).await?;
236
237        // Launch a new chromium instance
238        let mut child = config.launch()?;
239
240        /// Faillible initialization to run once the child process is created.
241        ///
242        /// All faillible calls must be executed inside this function. This ensures that all
243        /// errors are caught and that the child process is properly cleaned-up.
244        async fn with_child(
245            config: &BrowserConfig,
246            child: &mut Child,
247        ) -> Result<(String, Connection<CdpEventMessage>)> {
248            let dur = config.launch_timeout;
249            let timeout_fut = Box::pin(tokio::time::sleep(dur));
250
251            // extract the ws:
252            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
253            let conn = Connection::<CdpEventMessage>::connect_with_retries(
254                &debug_ws_url,
255                config.connection_retries,
256            )
257            .await?;
258            Ok((debug_ws_url, conn))
259        }
260
261        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
262            Ok(conn) => conn,
263            Err(e) => {
264                // An initialization error occurred, clean up the process
265                if let Ok(Some(_)) = child.try_wait() {
266                    // already exited, do nothing, may happen if the browser crashed
267                } else {
268                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
269                    let _ = child.kill().await;
270                    let _ = child.wait().await;
271                }
272                return Err(e);
273            }
274        };
275
276        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
277        // child process.
278
279        let (tx, rx) = channel(config.channel_capacity);
280
281        let handler_config = HandlerConfig {
282            ignore_https_errors: config.ignore_https_errors,
283            viewport: config.viewport.clone(),
284            context_ids: Vec::new(),
285            request_timeout: config.request_timeout,
286            request_intercept: config.request_intercept,
287            cache_enabled: config.cache_enabled,
288            ignore_visuals: config.ignore_visuals,
289            ignore_stylesheets: config.ignore_stylesheets,
290            ignore_javascript: config.ignore_javascript,
291            ignore_analytics: config.ignore_analytics,
292            ignore_prefetch: config.ignore_prefetch,
293            ignore_ads: config.ignore_ads,
294            extra_headers: config.extra_headers.clone(),
295            only_html: config.only_html,
296            service_worker_enabled: config.service_worker_enabled,
297            created_first_target: false,
298            intercept_manager: config.intercept_manager,
299            max_bytes_allowed: config.max_bytes_allowed,
300            max_redirects: config.max_redirects,
301            max_main_frame_navigations: config.max_main_frame_navigations,
302            whitelist_patterns: config.whitelist_patterns.clone(),
303            blacklist_patterns: config.blacklist_patterns.clone(),
304            #[cfg(feature = "adblock")]
305            adblock_filter_rules: config.adblock_filter_rules.clone(),
306            channel_capacity: config.channel_capacity,
307            page_channel_capacity: config.page_channel_capacity,
308            connection_retries: config.connection_retries,
309        };
310
311        let fut = Handler::new(conn, rx, handler_config);
312        let browser_context = fut.default_browser_context().clone();
313
314        let browser = Self {
315            sender: tx,
316            config: Some(config),
317            child: Some(child),
318            debug_ws_url,
319            browser_context,
320        };
321
322        Ok((browser, fut))
323    }
324
325    /// Request to fetch all existing browser targets.
326    ///
327    /// By default, only targets launched after the browser connection are tracked
328    /// when connecting to a existing browser instance with the devtools websocket url
329    /// This function fetches existing targets on the browser and adds them as pages internally
330    ///
331    /// The pages are not guaranteed to be ready as soon as the function returns
332    /// You should wait a few millis if you need to use a page
333    /// Returns [TargetInfo]
334    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
335        let (tx, rx) = oneshot_channel();
336
337        self.sender.send(HandlerMessage::FetchTargets(tx)).await?;
338
339        rx.await?
340    }
341
342    /// Request for the browser to close completely.
343    ///
344    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
345    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
346    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
347    /// [`Browser::drop`] waits automatically if needed.
348    pub async fn close(&self) -> Result<CloseReturns> {
349        let (tx, rx) = oneshot_channel();
350
351        self.sender.send(HandlerMessage::CloseBrowser(tx)).await?;
352
353        rx.await?
354    }
355
356    /// Asynchronously wait for the spawned chromium instance to exit completely.
357    ///
358    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
359    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
360    /// "zombie" processes.
361    ///
362    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
363    /// connected to an existing browser through [`Browser::connect`])
364    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
365        if let Some(child) = self.child.as_mut() {
366            Ok(Some(child.wait().await?))
367        } else {
368            Ok(None)
369        }
370    }
371
372    /// If the spawned chromium instance has completely exited, wait for it.
373    ///
374    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
375    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
376    /// "zombie" processes.
377    ///
378    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
379    /// connected to an existing browser through [`Browser::connect`])
380    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
381        if let Some(child) = self.child.as_mut() {
382            child.try_wait()
383        } else {
384            Ok(None)
385        }
386    }
387
388    /// Get the spawned chromium instance
389    ///
390    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
391    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
392    ///
393    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
394    /// for the selected runtime.
395    ///
396    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
397    /// connected to an existing browser through [`Browser::connect`])
398    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
399        self.child.as_mut()
400    }
401
402    /// Has a browser instance launched on system.
403    pub fn has_child(&self) -> bool {
404        self.child.is_some()
405    }
406
407    /// Forcibly kill the spawned chromium instance
408    ///
409    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
410    /// process to exit to avoid "zombie" processes.
411    ///
412    /// This method is provided to help if the browser does not close by itself. You should prefer
413    /// to use [`Browser::close`].
414    ///
415    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
416    /// connected to an existing browser through [`Browser::connect`])
417    pub async fn kill(&mut self) -> Option<io::Result<()>> {
418        match self.child.as_mut() {
419            Some(child) => Some(child.kill().await),
420            None => None,
421        }
422    }
423
424    /// If not launched as incognito this creates a new incognito browser
425    /// context. After that this browser exists within the incognito session.
426    /// New pages created while being in incognito mode will also run in the
427    /// incognito context. Incognito contexts won't share cookies/cache with
428    /// other browser contexts.
429    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
430        if !self.is_incognito_configured() {
431            let browser_context_id = self
432                .create_browser_context(CreateBrowserContextParams::default())
433                .await?;
434            self.browser_context = BrowserContext::from(browser_context_id);
435            self.sender
436                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
437                .await?;
438        }
439
440        Ok(self)
441    }
442
443    /// If a incognito session was created with
444    /// `Browser::start_incognito_context` this disposes this context.
445    ///
446    /// # Note This will also dispose all pages that were running within the
447    /// incognito context.
448    pub async fn quit_incognito_context_base(
449        &self,
450        browser_context_id: BrowserContextId,
451    ) -> Result<&Self> {
452        self.dispose_browser_context(browser_context_id.clone())
453            .await?;
454        self.sender
455            .send(HandlerMessage::DisposeContext(BrowserContext::from(
456                browser_context_id,
457            )))
458            .await?;
459        Ok(self)
460    }
461
462    /// If a incognito session was created with
463    /// `Browser::start_incognito_context` this disposes this context.
464    ///
465    /// # Note This will also dispose all pages that were running within the
466    /// incognito context.
467    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
468        if let Some(id) = self.browser_context.take() {
469            let _ = self.quit_incognito_context_base(id).await;
470        }
471        Ok(self)
472    }
473
474    /// Whether incognito mode was configured from the start
475    fn is_incognito_configured(&self) -> bool {
476        self.config
477            .as_ref()
478            .map(|c| c.incognito)
479            .unwrap_or_default()
480    }
481
482    /// Returns the address of the websocket this browser is attached to
483    pub fn websocket_address(&self) -> &String {
484        &self.debug_ws_url
485    }
486
487    /// Whether the BrowserContext is incognito.
488    pub fn is_incognito(&self) -> bool {
489        self.is_incognito_configured() || self.browser_context.is_incognito()
490    }
491
492    /// The config of the spawned chromium instance if any.
493    pub fn config(&self) -> Option<&BrowserConfig> {
494        self.config.as_ref()
495    }
496
497    /// Create a new browser page
498    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
499        let (tx, rx) = oneshot_channel();
500        let mut params = params.into();
501
502        if let Some(id) = self.browser_context.id() {
503            if params.browser_context_id.is_none() {
504                params.browser_context_id = Some(id.clone());
505            }
506        }
507
508        let _ = self
509            .sender
510            .send(HandlerMessage::CreatePage(params, tx))
511            .await;
512
513        rx.await?
514    }
515
516    /// Version information about the browser
517    pub async fn version(&self) -> Result<GetVersionReturns> {
518        Ok(self.execute(GetVersionParams::default()).await?.result)
519    }
520
521    /// Returns the user agent of the browser
522    pub async fn user_agent(&self) -> Result<String> {
523        Ok(self.version().await?.user_agent)
524    }
525
526    /// Call a browser method.
527    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
528        let (tx, rx) = oneshot_channel();
529        let method = cmd.identifier();
530        let msg = CommandMessage::new(cmd, tx)?;
531
532        self.sender.send(HandlerMessage::Command(msg)).await?;
533        let resp = rx.await??;
534        to_command_response::<T>(resp, method)
535    }
536
537    /// Set permission settings for given embedding and embedded origins.
538    /// [PermissionDescriptor](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionDescriptor)
539    /// [PermissionSetting](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionSetting)
540    pub async fn set_permission(
541        &self,
542        permission: PermissionDescriptor,
543        setting: PermissionSetting,
544        origin: Option<impl Into<String>>,
545        embedded_origin: Option<impl Into<String>>,
546        browser_context_id: Option<BrowserContextId>,
547    ) -> Result<&Self> {
548        self.execute(SetPermissionParams {
549            permission,
550            setting,
551            origin: origin.map(Into::into),
552            embedded_origin: embedded_origin.map(Into::into),
553            browser_context_id: browser_context_id.or_else(|| self.browser_context.id.clone()),
554        })
555        .await?;
556        Ok(self)
557    }
558
559    /// Convenience: set a permission for a single origin using the current browser context.
560    pub async fn set_permission_for_origin(
561        &self,
562        origin: impl Into<String>,
563        embedded_origin: Option<impl Into<String>>,
564        permission: PermissionDescriptor,
565        setting: PermissionSetting,
566    ) -> Result<&Self> {
567        self.set_permission(permission, setting, Some(origin), embedded_origin, None)
568            .await
569    }
570
571    /// "Reset" a permission override by setting it back to Prompt.
572    pub async fn reset_permission_for_origin(
573        &self,
574        origin: impl Into<String>,
575        embedded_origin: Option<impl Into<String>>,
576        permission: PermissionDescriptor,
577    ) -> Result<&Self> {
578        self.set_permission_for_origin(
579            origin,
580            embedded_origin,
581            permission,
582            PermissionSetting::Prompt,
583        )
584        .await
585    }
586
587    /// "Grant" all permissions.
588    pub async fn grant_all_permission_for_origin(
589        &self,
590        origin: impl Into<String>,
591        embedded_origin: Option<impl Into<String>>,
592        permission: PermissionDescriptor,
593    ) -> Result<&Self> {
594        self.set_permission_for_origin(
595            origin,
596            embedded_origin,
597            permission,
598            PermissionSetting::Granted,
599        )
600        .await
601    }
602
603    /// "Deny" all permissions.
604    pub async fn deny_all_permission_for_origin(
605        &self,
606        origin: impl Into<String>,
607        embedded_origin: Option<impl Into<String>>,
608        permission: PermissionDescriptor,
609    ) -> Result<&Self> {
610        self.set_permission_for_origin(
611            origin,
612            embedded_origin,
613            permission,
614            PermissionSetting::Denied,
615        )
616        .await
617    }
618
619    /// Return all of the pages of the browser
620    pub async fn pages(&self) -> Result<Vec<Page>> {
621        let (tx, rx) = oneshot_channel();
622        self.sender.send(HandlerMessage::GetPages(tx)).await?;
623        Ok(rx.await?)
624    }
625
626    /// Return page of given target_id
627    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
628        let (tx, rx) = oneshot_channel();
629        self.sender
630            .send(HandlerMessage::GetPage(target_id, tx))
631            .await?;
632        rx.await?.ok_or(CdpError::NotFound)
633    }
634
635    /// Set listener for browser event
636    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
637        let (tx, rx) = unbounded_channel();
638        self.sender
639            .send(HandlerMessage::AddEventListener(
640                EventListenerRequest::new::<T>(tx),
641            ))
642            .await?;
643
644        Ok(EventStream::new(rx))
645    }
646
647    /// Creates a new empty browser context.
648    pub async fn create_browser_context(
649        &mut self,
650        params: CreateBrowserContextParams,
651    ) -> Result<BrowserContextId> {
652        let response = self.execute(params).await?;
653
654        Ok(response.result.browser_context_id)
655    }
656
657    /// Returns all browser contexts created with Target.createBrowserContext method.
658    pub async fn get_browser_contexts(
659        &mut self,
660        params: GetBrowserContextsParams,
661    ) -> Result<GetBrowserContextsReturns> {
662        let response = self.execute(params).await?;
663        Ok(response.result)
664    }
665
666    /// Send a new empty browser context.
667    pub async fn send_new_context(
668        &mut self,
669        browser_context_id: BrowserContextId,
670    ) -> Result<&Self> {
671        self.browser_context = BrowserContext::from(browser_context_id);
672        self.sender
673            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
674            .await?;
675        Ok(self)
676    }
677
678    /// Deletes a browser context.
679    pub async fn dispose_browser_context(
680        &self,
681        browser_context_id: impl Into<BrowserContextId>,
682    ) -> Result<&Self> {
683        self.execute(DisposeBrowserContextParams::new(browser_context_id))
684            .await?;
685
686        Ok(self)
687    }
688
689    /// Clears cookies.
690    pub async fn clear_cookies(&self) -> Result<&Self> {
691        self.execute(ClearCookiesParams::default()).await?;
692        Ok(self)
693    }
694
695    /// Returns all browser cookies.
696    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
697        let cmd = GetCookiesParams {
698            browser_context_id: self.browser_context.id.clone(),
699        };
700
701        Ok(self.execute(cmd).await?.result.cookies)
702    }
703
704    /// Sets given cookies.
705    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
706        for cookie in &mut cookies {
707            if let Some(url) = cookie.url.as_ref() {
708                crate::page::validate_cookie_url(url)?;
709            }
710        }
711
712        let mut cookies_param = SetCookiesParams::new(cookies);
713
714        cookies_param.browser_context_id = self.browser_context.id.clone();
715
716        self.execute(cookies_param).await?;
717        Ok(self)
718    }
719}
720
721impl Drop for Browser {
722    fn drop(&mut self) {
723        if let Some(child) = self.child.as_mut() {
724            if let Ok(Some(_)) = child.try_wait() {
725                // Already exited, do nothing. Usually occurs after using the method close or kill.
726            } else {
727                // We set the `kill_on_drop` property for the child process, so no need to explicitely
728                // kill it here. It can't really be done anyway since the method is async.
729                //
730                // On Unix, the process will be reaped in the background by the runtime automatically
731                // so it won't leave any resources locked. It is, however, a better practice for the user to
732                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
733                // warn him here.
734                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
735            }
736        }
737    }
738}
739
740/// Resolve devtools WebSocket URL from the provided browser process
741///
742/// If an error occurs, it returns the browser's stderr output.
743///
744/// The URL resolution fails if:
745/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
746/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
747/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
748///   the browser's stderr: end of stream, invalid UTF-8, other
749async fn ws_url_from_output(
750    child_process: &mut Child,
751    timeout_fut: impl Future<Output = ()> + Unpin,
752) -> Result<String> {
753    use tokio::io::AsyncBufReadExt;
754    let stderr = match child_process.stderr.take() {
755        Some(stderr) => stderr,
756        None => {
757            return Err(CdpError::LaunchIo(
758                io::Error::new(io::ErrorKind::NotFound, "browser process has no stderr"),
759                BrowserStderr::new(Vec::new()),
760            ));
761        }
762    };
763    let mut stderr_bytes = Vec::<u8>::new();
764    let mut buf = tokio::io::BufReader::new(stderr);
765    let mut timeout_fut = timeout_fut;
766    loop {
767        tokio::select! {
768            _ = &mut timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
769            exit_status = child_process.wait() => {
770                return Err(match exit_status {
771                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
772                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
773                })
774            },
775            read_res = buf.read_until(b'\n', &mut stderr_bytes) => {
776                match read_res {
777                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
778                    Ok(byte_count) => {
779                        if byte_count == 0 {
780                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
781                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
782                        }
783                        let start_offset = stderr_bytes.len() - byte_count;
784                        let new_bytes = &stderr_bytes[start_offset..];
785                        match std::str::from_utf8(new_bytes) {
786                            Err(_) => {
787                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
788                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
789                            }
790                            Ok(line) => {
791                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
792                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
793                                        return Ok(ws.trim().to_string());
794                                    }
795                                }
796                            }
797                        }
798                    }
799                }
800            }
801        }
802    }
803}
804
805#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
806pub enum HeadlessMode {
807    /// The "headful" mode.
808    False,
809    /// The old headless mode.
810    #[default]
811    True,
812    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
813    New,
814}
815
816#[derive(Debug, Clone, Default)]
817pub struct BrowserConfig {
818    /// Determines whether to run headless version of the browser. Defaults to
819    /// true.
820    headless: HeadlessMode,
821    /// Determines whether to run the browser with a sandbox.
822    sandbox: bool,
823    /// Launch the browser with a specific window width and height.
824    window_size: Option<(u32, u32)>,
825    /// Launch the browser with a specific debugging port.
826    port: u16,
827    /// Path for Chrome or Chromium.
828    ///
829    /// If unspecified, the create will try to automatically detect a suitable
830    /// binary.
831    executable: std::path::PathBuf,
832
833    /// A list of Chrome extensions to load.
834    ///
835    /// An extension should be a path to a folder containing the extension code.
836    /// CRX files cannot be used directly and must be first extracted.
837    ///
838    /// Note that Chrome does not support loading extensions in headless-mode.
839    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
840    extensions: Vec<String>,
841
842    /// Environment variables to set for the Chromium process.
843    /// Passes value through to std::process::Command::envs.
844    pub process_envs: Option<HashMap<String, String>>,
845
846    /// Data dir for user data
847    pub user_data_dir: Option<PathBuf>,
848
849    /// Whether to launch the `Browser` in incognito mode.
850    incognito: bool,
851
852    /// Timeout duration for `Browser::launch`.
853    launch_timeout: Duration,
854
855    /// Ignore https errors, default is true.
856    ignore_https_errors: bool,
857    pub viewport: Option<Viewport>,
858    /// The duration after a request with no response should time out.
859    request_timeout: Duration,
860
861    /// Additional command line arguments to pass to the browser instance.
862    args: Vec<String>,
863
864    /// Whether to disable DEFAULT_ARGS or not, default is false.
865    disable_default_args: bool,
866
867    /// Whether to enable request interception.
868    pub request_intercept: bool,
869
870    /// Whether to enable cache.
871    pub cache_enabled: bool,
872    /// Whether to enable or disable Service Workers.
873    /// Disabling may reduce background network activity and caching effects.
874    pub service_worker_enabled: bool,
875    /// Whether to ignore image/visual requests during interception.
876    /// Can reduce bandwidth and speed up crawling when visuals are unnecessary.
877    pub ignore_visuals: bool,
878    /// Whether to ignore stylesheet (CSS) requests during interception.
879    /// Useful for content-only crawls.
880    pub ignore_stylesheets: bool,
881    /// Whether to ignore JavaScript requests during interception.
882    /// This still allows critical framework bundles to pass when applicable.
883    pub ignore_javascript: bool,
884    /// Whether to ignore analytics/telemetry requests during interception.
885    pub ignore_analytics: bool,
886    /// Ignore prefetch request.
887    pub ignore_prefetch: bool,
888    /// Whether to ignore ad network requests during interception.
889    pub ignore_ads: bool,
890    /// Extra headers.
891    pub extra_headers: Option<std::collections::HashMap<String, String>>,
892    /// Only html
893    pub only_html: bool,
894    /// The interception intercept manager.
895    pub intercept_manager: NetworkInterceptManager,
896    /// The max bytes to receive.
897    pub max_bytes_allowed: Option<u64>,
898    /// Cap on Document-type redirect hops before the navigation is aborted.
899    /// `None` disables enforcement; `Some(n)` mirrors `reqwest::redirect::Policy::limited(n)`.
900    pub max_redirects: Option<usize>,
901    /// Cap on main-frame cross-document navigations per `goto`. Defends against
902    /// JS / meta-refresh loops that bypass the HTTP redirect guard. `None`
903    /// disables the guard.
904    pub max_main_frame_navigations: Option<u32>,
905    /// Whitelist patterns to allow through the network.
906    pub whitelist_patterns: Option<Vec<String>>,
907    /// Blacklist patterns to block through the network.
908    pub blacklist_patterns: Option<Vec<String>>,
909    /// Extra ABP/uBO filter rules to load into the adblock engine (requires `adblock` feature).
910    /// These are merged with the built-in `ADBLOCK_PATTERNS` for richer blocking
911    /// (e.g. EasyList / EasyPrivacy content).
912    #[cfg(feature = "adblock")]
913    pub adblock_filter_rules: Option<Vec<String>>,
914    /// Capacity of the channel between browser handle and handler.
915    /// Defaults to 1000.
916    pub channel_capacity: usize,
917    /// Capacity of the per-page mpsc channel carrying `TargetMessage`s
918    /// from each `Page` to the handler. Defaults to 2048; override via
919    /// `page_channel_capacity(N)` on the builder. Values of `0` are
920    /// clamped to `1` at channel creation.
921    pub page_channel_capacity: usize,
922    /// Number of WebSocket connection retry attempts with exponential backoff.
923    /// Defaults to 4.
924    pub connection_retries: u32,
925}
926
927#[derive(Debug, Clone)]
928pub struct BrowserConfigBuilder {
929    /// Headless mode configuration for the browser.
930    headless: HeadlessMode,
931    /// Whether to run the browser with a sandbox.
932    sandbox: bool,
933    /// Optional initial browser window size `(width, height)`.
934    window_size: Option<(u32, u32)>,
935    /// DevTools debugging port to bind to.
936    port: u16,
937    /// Optional explicit path to the Chrome/Chromium executable.
938    /// If `None`, auto-detection may be attempted based on `executation_detection`.
939    executable: Option<PathBuf>,
940    /// Controls auto-detection behavior for finding a Chrome/Chromium binary.
941    executation_detection: DetectionOptions,
942    /// List of unpacked extensions (directories) to load at startup.
943    extensions: Vec<String>,
944    /// Environment variables to set on the spawned Chromium process.
945    process_envs: Option<HashMap<String, String>>,
946    /// User data directory to persist browser state, or `None` for ephemeral.
947    user_data_dir: Option<PathBuf>,
948    /// Whether to start the browser in incognito (off-the-record) mode.
949    incognito: bool,
950    /// Maximum time to wait for the browser to launch and become ready.
951    launch_timeout: Duration,
952    /// Whether to ignore HTTPS/TLS errors during navigation and requests.
953    ignore_https_errors: bool,
954    /// Default page viewport configuration applied on startup.
955    viewport: Option<Viewport>,
956    /// Timeout for individual network requests without response progress.
957    request_timeout: Duration,
958    /// Additional command-line flags passed directly to the browser process.
959    args: Vec<String>,
960    /// Disable the default argument set and use only the provided `args`.
961    disable_default_args: bool,
962    /// Enable Network.requestInterception for request filtering/handling.
963    request_intercept: bool,
964    /// Enable the browser cache for navigations and subresources.
965    cache_enabled: bool,
966    /// Enable/disable Service Workers.
967    service_worker_enabled: bool,
968    /// Drop image/visual requests when interception is enabled.
969    ignore_visuals: bool,
970    /// Drop ad network requests when interception is enabled.
971    ignore_ads: bool,
972    /// Drop JavaScript requests when interception is enabled.
973    ignore_javascript: bool,
974    /// Drop stylesheet (CSS) requests when interception is enabled.
975    ignore_stylesheets: bool,
976    /// Ignore prefetch domains.
977    ignore_prefetch: bool,
978    /// Drop analytics/telemetry requests when interception is enabled.
979    ignore_analytics: bool,
980    /// If `true`, limit fetching to HTML documents.
981    only_html: bool,
982    /// Extra HTTP headers to include with every request.
983    extra_headers: Option<std::collections::HashMap<String, String>>,
984    /// Network interception manager used to configure filtering behavior.
985    intercept_manager: NetworkInterceptManager,
986    /// Optional upper bound on bytes that may be received (per session/run).
987    max_bytes_allowed: Option<u64>,
988    /// Optional cap on Document redirect hops per navigation (`None` = disabled).
989    max_redirects: Option<usize>,
990    /// Optional cap on main-frame cross-document navigations per goto.
991    max_main_frame_navigations: Option<u32>,
992    /// Whitelist patterns to allow through the network.
993    whitelist_patterns: Option<Vec<String>>,
994    /// Blacklist patterns to block through the network.
995    blacklist_patterns: Option<Vec<String>>,
996    /// Extra ABP/uBO filter rules for the adblock engine.
997    #[cfg(feature = "adblock")]
998    adblock_filter_rules: Option<Vec<String>>,
999    /// Capacity of the channel between browser handle and handler.
1000    channel_capacity: usize,
1001    /// Capacity of the per-page mpsc `TargetMessage` channel.
1002    page_channel_capacity: usize,
1003    /// Number of WebSocket connection retry attempts.
1004    connection_retries: u32,
1005}
1006
1007impl BrowserConfig {
1008    /// Browser builder default config.
1009    pub fn builder() -> BrowserConfigBuilder {
1010        BrowserConfigBuilder::default()
1011    }
1012
1013    /// Launch with the executable path.
1014    pub fn with_executable(path: impl AsRef<Path>) -> Self {
1015        // SAFETY: build() only fails when no executable is provided,
1016        // but we always provide one via chrome_executable().
1017        Self::builder().chrome_executable(path).build().unwrap()
1018    }
1019}
1020
1021impl Default for BrowserConfigBuilder {
1022    fn default() -> Self {
1023        Self {
1024            headless: HeadlessMode::True,
1025            sandbox: true,
1026            window_size: None,
1027            port: 0,
1028            executable: None,
1029            executation_detection: DetectionOptions::default(),
1030            extensions: Vec::new(),
1031            process_envs: None,
1032            user_data_dir: None,
1033            incognito: false,
1034            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
1035            ignore_https_errors: true,
1036            viewport: Some(Default::default()),
1037            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1038            args: Vec::new(),
1039            disable_default_args: false,
1040            request_intercept: false,
1041            cache_enabled: true,
1042            ignore_visuals: false,
1043            ignore_ads: false,
1044            ignore_javascript: false,
1045            ignore_analytics: false,
1046            ignore_stylesheets: false,
1047            ignore_prefetch: true,
1048            only_html: false,
1049            extra_headers: Default::default(),
1050            service_worker_enabled: true,
1051            intercept_manager: NetworkInterceptManager::Unknown,
1052            max_bytes_allowed: None,
1053            max_redirects: None,
1054            max_main_frame_navigations: None,
1055            whitelist_patterns: None,
1056            blacklist_patterns: None,
1057            #[cfg(feature = "adblock")]
1058            adblock_filter_rules: None,
1059            channel_capacity: 4096,
1060            page_channel_capacity: crate::handler::page::DEFAULT_PAGE_CHANNEL_CAPACITY,
1061            connection_retries: crate::conn::DEFAULT_CONNECTION_RETRIES,
1062        }
1063    }
1064}
1065
1066impl BrowserConfigBuilder {
1067    /// Configure window size.
1068    pub fn window_size(mut self, width: u32, height: u32) -> Self {
1069        self.window_size = Some((width, height));
1070        self
1071    }
1072    /// Configure sandboxing.
1073    pub fn no_sandbox(mut self) -> Self {
1074        self.sandbox = false;
1075        self
1076    }
1077    /// Configure the launch to start non headless.
1078    pub fn with_head(mut self) -> Self {
1079        self.headless = HeadlessMode::False;
1080        self
1081    }
1082    /// Configure the launch with the new headless mode.
1083    pub fn new_headless_mode(mut self) -> Self {
1084        self.headless = HeadlessMode::New;
1085        self
1086    }
1087    /// Configure the launch with headless.
1088    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
1089        self.headless = mode;
1090        self
1091    }
1092    /// Configure the launch in incognito.
1093    pub fn incognito(mut self) -> Self {
1094        self.incognito = true;
1095        self
1096    }
1097
1098    pub fn respect_https_errors(mut self) -> Self {
1099        self.ignore_https_errors = false;
1100        self
1101    }
1102
1103    pub fn port(mut self, port: u16) -> Self {
1104        self.port = port;
1105        self
1106    }
1107
1108    pub fn with_max_bytes_allowed(mut self, max_bytes_allowed: Option<u64>) -> Self {
1109        self.max_bytes_allowed = max_bytes_allowed;
1110        self
1111    }
1112
1113    /// Cap the number of Document-type redirect hops per navigation.
1114    ///
1115    /// `None` disables enforcement (default, preserves Chromium's own ~20-hop cap).
1116    /// `Some(n)` aborts once a navigation chain exceeds `n` by emitting
1117    /// `net::ERR_TOO_MANY_REDIRECTS` and calling `Page.stopLoading`.
1118    pub fn with_max_redirects(mut self, max_redirects: Option<usize>) -> Self {
1119        self.max_redirects = max_redirects;
1120        self
1121    }
1122
1123    /// Cap the number of main-frame cross-document navigations allowed per
1124    /// `goto` call.
1125    ///
1126    /// Defends against JS `location.href` / meta-refresh loops that bypass
1127    /// HTTP-level redirect detection — each hop looks like a fresh document
1128    /// to Chromium, so `with_max_redirects` alone cannot catch them. `None`
1129    /// disables the guard (default).
1130    pub fn with_max_main_frame_navigations(mut self, cap: Option<u32>) -> Self {
1131        self.max_main_frame_navigations = cap;
1132        self
1133    }
1134
1135    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
1136        self.launch_timeout = timeout;
1137        self
1138    }
1139
1140    pub fn request_timeout(mut self, timeout: Duration) -> Self {
1141        self.request_timeout = timeout;
1142        self
1143    }
1144
1145    /// Configures the viewport of the browser, which defaults to `800x600`.
1146    /// `None` disables viewport emulation (i.e., it uses the browsers default
1147    /// configuration, which fills the available space. This is similar to what
1148    /// Playwright does when you provide `null` as the value of its `viewport`
1149    /// option).
1150    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
1151        self.viewport = viewport.into();
1152        self
1153    }
1154
1155    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
1156        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
1157        self
1158    }
1159
1160    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
1161        self.executable = Some(path.as_ref().to_path_buf());
1162        self
1163    }
1164
1165    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
1166        self.executation_detection = options;
1167        self
1168    }
1169
1170    pub fn extension(mut self, extension: impl Into<String>) -> Self {
1171        self.extensions.push(extension.into());
1172        self
1173    }
1174
1175    pub fn extensions<I, S>(mut self, extensions: I) -> Self
1176    where
1177        I: IntoIterator<Item = S>,
1178        S: Into<String>,
1179    {
1180        for ext in extensions {
1181            self.extensions.push(ext.into());
1182        }
1183        self
1184    }
1185
1186    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
1187        self.process_envs
1188            .get_or_insert(HashMap::new())
1189            .insert(key.into(), val.into());
1190        self
1191    }
1192
1193    pub fn envs<I, K, V>(mut self, envs: I) -> Self
1194    where
1195        I: IntoIterator<Item = (K, V)>,
1196        K: Into<String>,
1197        V: Into<String>,
1198    {
1199        self.process_envs
1200            .get_or_insert(HashMap::new())
1201            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
1202        self
1203    }
1204
1205    pub fn arg(mut self, arg: impl Into<String>) -> Self {
1206        self.args.push(arg.into());
1207        self
1208    }
1209
1210    pub fn args<I, S>(mut self, args: I) -> Self
1211    where
1212        I: IntoIterator<Item = S>,
1213        S: Into<String>,
1214    {
1215        for arg in args {
1216            self.args.push(arg.into());
1217        }
1218        self
1219    }
1220
1221    pub fn disable_default_args(mut self) -> Self {
1222        self.disable_default_args = true;
1223        self
1224    }
1225
1226    pub fn enable_request_intercept(mut self) -> Self {
1227        self.request_intercept = true;
1228        self
1229    }
1230
1231    pub fn disable_request_intercept(mut self) -> Self {
1232        self.request_intercept = false;
1233        self
1234    }
1235
1236    pub fn enable_cache(mut self) -> Self {
1237        self.cache_enabled = true;
1238        self
1239    }
1240
1241    pub fn disable_cache(mut self) -> Self {
1242        self.cache_enabled = false;
1243        self
1244    }
1245
1246    /// Set service worker enabled.
1247    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
1248        self.service_worker_enabled = bypass;
1249        self
1250    }
1251
1252    /// Set extra request headers.
1253    pub fn set_extra_headers(
1254        mut self,
1255        headers: Option<std::collections::HashMap<String, String>>,
1256    ) -> Self {
1257        self.extra_headers = headers;
1258        self
1259    }
1260
1261    /// Set whitelist patterns to allow through network interception allowing.
1262    pub fn set_whitelist_patterns(mut self, whitelist_patterns: Option<Vec<String>>) -> Self {
1263        self.whitelist_patterns = whitelist_patterns;
1264        self
1265    }
1266
1267    /// Set blacklist patterns to block through network interception.
1268    pub fn set_blacklist_patterns(mut self, blacklist_patterns: Option<Vec<String>>) -> Self {
1269        self.blacklist_patterns = blacklist_patterns;
1270        self
1271    }
1272
1273    /// Set extra ABP/uBO filter rules for the adblock engine.
1274    /// Pass EasyList/EasyPrivacy content lines for richer blocking coverage.
1275    #[cfg(feature = "adblock")]
1276    pub fn set_adblock_filter_rules(mut self, rules: Vec<String>) -> Self {
1277        self.adblock_filter_rules = Some(rules);
1278        self
1279    }
1280
1281    /// Set the capacity of the channel between browser handle and handler.
1282    /// Defaults to 1000.
1283    pub fn channel_capacity(mut self, capacity: usize) -> Self {
1284        self.channel_capacity = capacity;
1285        self
1286    }
1287
1288    /// Set the capacity of the per-page mpsc channel carrying
1289    /// `TargetMessage`s from each `Page` to the handler.
1290    ///
1291    /// Defaults to 2048 (the previous hard-coded value). Tune upward to
1292    /// absorb bursts of commands without pushing them onto the
1293    /// `CommandFuture` async-send fallback path; tune downward to apply
1294    /// back-pressure sooner. Values of `0` are clamped to `1` at channel
1295    /// creation time (tokio panics on a zero-capacity mpsc).
1296    pub fn page_channel_capacity(mut self, capacity: usize) -> Self {
1297        self.page_channel_capacity = capacity;
1298        self
1299    }
1300
1301    /// Set the number of WebSocket connection retry attempts with exponential backoff.
1302    /// Defaults to 4. Set to 0 for a single attempt with no retries.
1303    pub fn connection_retries(mut self, retries: u32) -> Self {
1304        self.connection_retries = retries;
1305        self
1306    }
1307
1308    /// Build the browser.
1309    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1310        let executable = if let Some(e) = self.executable {
1311            e
1312        } else {
1313            detection::default_executable(self.executation_detection)?
1314        };
1315
1316        Ok(BrowserConfig {
1317            headless: self.headless,
1318            sandbox: self.sandbox,
1319            window_size: self.window_size,
1320            port: self.port,
1321            executable,
1322            extensions: self.extensions,
1323            process_envs: self.process_envs,
1324            user_data_dir: self.user_data_dir,
1325            incognito: self.incognito,
1326            launch_timeout: self.launch_timeout,
1327            ignore_https_errors: self.ignore_https_errors,
1328            viewport: self.viewport,
1329            request_timeout: self.request_timeout,
1330            args: self.args,
1331            disable_default_args: self.disable_default_args,
1332            request_intercept: self.request_intercept,
1333            cache_enabled: self.cache_enabled,
1334            ignore_visuals: self.ignore_visuals,
1335            ignore_ads: self.ignore_ads,
1336            ignore_javascript: self.ignore_javascript,
1337            ignore_analytics: self.ignore_analytics,
1338            ignore_stylesheets: self.ignore_stylesheets,
1339            ignore_prefetch: self.ignore_prefetch,
1340            extra_headers: self.extra_headers,
1341            only_html: self.only_html,
1342            intercept_manager: self.intercept_manager,
1343            service_worker_enabled: self.service_worker_enabled,
1344            max_bytes_allowed: self.max_bytes_allowed,
1345            max_redirects: self.max_redirects,
1346            max_main_frame_navigations: self.max_main_frame_navigations,
1347            whitelist_patterns: self.whitelist_patterns,
1348            blacklist_patterns: self.blacklist_patterns,
1349            #[cfg(feature = "adblock")]
1350            adblock_filter_rules: self.adblock_filter_rules,
1351            channel_capacity: self.channel_capacity,
1352            page_channel_capacity: self.page_channel_capacity,
1353            connection_retries: self.connection_retries,
1354        })
1355    }
1356}
1357
1358impl BrowserConfig {
1359    pub fn launch(&self) -> io::Result<Child> {
1360        let mut cmd = async_process::Command::new(&self.executable);
1361
1362        if self.disable_default_args {
1363            cmd.args(&self.args);
1364        } else {
1365            cmd.args(DEFAULT_ARGS).args(&self.args);
1366        }
1367
1368        if !self
1369            .args
1370            .iter()
1371            .any(|arg| arg.contains("--remote-debugging-port="))
1372        {
1373            cmd.arg(format!("--remote-debugging-port={}", self.port));
1374        }
1375
1376        cmd.args(
1377            self.extensions
1378                .iter()
1379                .map(|e| format!("--load-extension={e}")),
1380        );
1381
1382        if let Some(ref user_data) = self.user_data_dir {
1383            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1384        } else {
1385            // If the user did not specify a data directory, this would default to the systems default
1386            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1387            // a temp dir just for chromiumoxide instead.
1388            cmd.arg(format!(
1389                "--user-data-dir={}",
1390                std::env::temp_dir().join("chromiumoxide-runner").display()
1391            ));
1392        }
1393
1394        if let Some((width, height)) = self.window_size {
1395            cmd.arg(format!("--window-size={width},{height}"));
1396        }
1397
1398        if !self.sandbox {
1399            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1400        }
1401
1402        match self.headless {
1403            HeadlessMode::False => (),
1404            HeadlessMode::True => {
1405                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1406            }
1407            HeadlessMode::New => {
1408                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1409            }
1410        }
1411
1412        if self.incognito {
1413            cmd.arg("--incognito");
1414        }
1415
1416        if let Some(ref envs) = self.process_envs {
1417            cmd.envs(envs);
1418        }
1419        cmd.stderr(Stdio::piped()).spawn()
1420    }
1421}
1422
1423/// Returns the path to Chrome's executable.
1424///
1425/// If the `CHROME` environment variable is set, `default_executable` will
1426/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1427/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1428/// searched for in standard places. If that fails,
1429/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1430/// Windows) is consulted. If all of the above fail, an error is returned.
1431#[deprecated(note = "Use detection::default_executable instead")]
1432pub fn default_executable() -> Result<std::path::PathBuf, String> {
1433    let options = DetectionOptions {
1434        msedge: false,
1435        unstable: false,
1436    };
1437    detection::default_executable(options)
1438}
1439
1440/// These are passed to the Chrome binary by default.
1441/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1442static DEFAULT_ARGS: [&str; 26] = [
1443    "--disable-background-networking",
1444    "--enable-features=NetworkService,NetworkServiceInProcess",
1445    "--disable-background-timer-throttling",
1446    "--disable-backgrounding-occluded-windows",
1447    "--disable-breakpad",
1448    "--disable-client-side-phishing-detection",
1449    "--disable-component-extensions-with-background-pages",
1450    "--disable-default-apps",
1451    "--disable-dev-shm-usage",
1452    "--disable-extensions",
1453    "--disable-features=TranslateUI",
1454    "--disable-hang-monitor",
1455    "--disable-ipc-flooding-protection",
1456    "--disable-popup-blocking",
1457    "--disable-prompt-on-repost",
1458    "--disable-renderer-backgrounding",
1459    "--disable-sync",
1460    "--force-color-profile=srgb",
1461    "--metrics-recording-only",
1462    "--no-first-run",
1463    "--enable-automation",
1464    "--password-store=basic",
1465    "--use-mock-keychain",
1466    "--enable-blink-features=IdleDetection",
1467    "--lang=en_US",
1468    "--disable-blink-features=AutomationControlled",
1469];