Skip to main content

chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use tokio::sync::mpsc::{channel, unbounded_channel, Sender};
11use tokio::sync::oneshot::channel as oneshot_channel;
12
13use crate::async_process::{self, Child, ExitStatus, Stdio};
14use crate::cmd::{to_command_response, CommandMessage};
15use crate::conn::Connection;
16use crate::detection::{self, DetectionOptions};
17use crate::error::{BrowserStderr, CdpError, Result};
18use crate::handler::browser::BrowserContext;
19use crate::handler::viewport::Viewport;
20use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
21use crate::listeners::{EventListenerRequest, EventStream};
22use crate::page::Page;
23use crate::utils;
24use chromiumoxide_cdp::cdp::browser_protocol::browser::{
25    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
26};
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    PermissionDescriptor, PermissionSetting, SetPermissionParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
36    GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
37};
38
39use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
40use chromiumoxide_types::*;
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42
43/// Default `Browser::launch` timeout in MS
44pub const LAUNCH_TIMEOUT: u64 = 20_000;
45
46lazy_static::lazy_static! {
47    /// The request client to get the web socket url.
48    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
49        .timeout(Duration::from_secs(60))
50        .default_headers({
51            let mut m = HeaderMap::new();
52
53            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55            m
56        })
57        .tcp_keepalive(Some(Duration::from_secs(5)))
58        .pool_idle_timeout(Some(Duration::from_secs(60)))
59        .pool_max_idle_per_host(10)
60        .build()
61        .expect("client to build");
62}
63
64/// Returns chromey's global `reqwest::Client` for reuse by other subsystems
65/// (e.g. remote cache uploads via `spider_remote_cache`).
66pub fn request_client() -> &'static reqwest::Client {
67    &REQUEST_CLIENT
68}
69
70/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
71#[derive(Debug)]
72pub struct Browser {
73    /// The `Sender` to send messages to the connection handler that drives the
74    /// websocket
75    pub(crate) sender: Sender<HandlerMessage>,
76    /// How the spawned chromium instance was configured, if any
77    config: Option<BrowserConfig>,
78    /// The spawned chromium instance
79    child: Option<Child>,
80    /// The debug web socket url of the chromium instance
81    debug_ws_url: String,
82    /// The context of the browser
83    pub browser_context: BrowserContext,
84}
85
86/// Browser connection information.
87#[derive(serde::Deserialize, Debug, Default)]
88pub struct BrowserConnection {
89    #[serde(rename = "Browser")]
90    /// The browser name
91    pub browser: String,
92    #[serde(rename = "Protocol-Version")]
93    /// Browser version
94    pub protocol_version: String,
95    #[serde(rename = "User-Agent")]
96    /// User Agent used by default.
97    pub user_agent: String,
98    #[serde(rename = "V8-Version")]
99    /// The v8 engine version
100    pub v8_version: String,
101    #[serde(rename = "WebKit-Version")]
102    /// Webkit version
103    pub webkit_version: String,
104    #[serde(rename = "webSocketDebuggerUrl")]
105    /// Remote debugging address
106    pub web_socket_debugger_url: String,
107}
108
109impl Browser {
110    /// Connect to an already running chromium instance via the given URL.
111    ///
112    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
113    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
114        Self::connect_with_config(url, HandlerConfig::default()).await
115    }
116
117    // Connect to an already running chromium instance with a given `HandlerConfig`.
118    ///
119    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
120    pub async fn connect_with_config(
121        url: impl Into<String>,
122        config: HandlerConfig,
123    ) -> Result<(Self, Handler)> {
124        let mut debug_ws_url = url.into();
125        let retries = config.connection_retries;
126
127        if debug_ws_url.starts_with("http") {
128            let version_url = if debug_ws_url.ends_with("/json/version")
129                || debug_ws_url.ends_with("/json/version/")
130            {
131                debug_ws_url.to_owned()
132            } else {
133                format!(
134                    "{}{}json/version",
135                    &debug_ws_url,
136                    if debug_ws_url.ends_with('/') { "" } else { "/" }
137                )
138            };
139
140            let mut discovered = false;
141
142            for attempt in 0..=retries {
143                let retry = || async {
144                    if attempt < retries {
145                        let backoff_ms = 50u64 * 3u64.saturating_pow(attempt);
146                        tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
147                    }
148                };
149
150                match REQUEST_CLIENT.get(&version_url).send().await {
151                    Ok(req) => match req.bytes().await {
152                        Ok(b) => {
153                            match crate::serde_json::from_slice::<Box<BrowserConnection>>(&b) {
154                                Ok(connection)
155                                    if !connection.web_socket_debugger_url.is_empty() =>
156                                {
157                                    debug_ws_url = connection.web_socket_debugger_url;
158                                    discovered = true;
159                                    break;
160                                }
161                                _ => {
162                                    // JSON parse failed or webSocketDebuggerUrl was empty — retry
163                                    retry().await;
164                                }
165                            }
166                        }
167                        Err(_) => {
168                            retry().await;
169                        }
170                    },
171                    Err(_) => {
172                        retry().await;
173                    }
174                }
175            }
176
177            if !discovered {
178                return Err(CdpError::NoResponse);
179            }
180        }
181
182        let conn =
183            Connection::<CdpEventMessage>::connect_with_retries(&debug_ws_url, retries).await?;
184
185        let (tx, rx) = channel(config.channel_capacity);
186
187        let handler_config = BrowserConfig {
188            ignore_https_errors: config.ignore_https_errors,
189            viewport: config.viewport.clone(),
190            request_timeout: config.request_timeout,
191            request_intercept: config.request_intercept,
192            cache_enabled: config.cache_enabled,
193            ignore_visuals: config.ignore_visuals,
194            ignore_stylesheets: config.ignore_stylesheets,
195            ignore_javascript: config.ignore_javascript,
196            ignore_analytics: config.ignore_analytics,
197            ignore_prefetch: config.ignore_prefetch,
198            ignore_ads: config.ignore_ads,
199            extra_headers: config.extra_headers.clone(),
200            only_html: config.only_html,
201            service_worker_enabled: config.service_worker_enabled,
202            intercept_manager: config.intercept_manager,
203            max_bytes_allowed: config.max_bytes_allowed,
204            max_redirects: config.max_redirects,
205            max_main_frame_navigations: config.max_main_frame_navigations,
206            whitelist_patterns: config.whitelist_patterns.clone(),
207            blacklist_patterns: config.blacklist_patterns.clone(),
208            ..Default::default()
209        };
210
211        let fut = Handler::new(conn, rx, config);
212        let browser_context = fut.default_browser_context().clone();
213
214        let browser = Self {
215            sender: tx,
216            config: Some(handler_config),
217            child: None,
218            debug_ws_url,
219            browser_context,
220        };
221
222        Ok((browser, fut))
223    }
224
225    /// Launches a new instance of `chromium` in the background and attaches to
226    /// its debug web socket.
227    ///
228    /// This fails when no chromium executable could be detected.
229    ///
230    /// This fails if no web socket url could be detected from the child
231    /// processes stderr for more than the configured `launch_timeout`
232    /// (20 seconds by default).
233    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
234        // Eagerly initialize the background cleanup worker in this
235        // runtime so that later `Drop` calls on CDP streams / temp
236        // files (from `bg_cleanup::submit`) land on a live receiver.
237        // This is a single atomic load after the first call — safe
238        // and cheap to invoke on every `launch`.
239        crate::bg_cleanup::init_worker();
240
241        // Canonalize paths to reduce issues with sandboxing
242        config.executable = utils::canonicalize_except_snap(config.executable).await?;
243
244        // Launch a new chromium instance
245        let mut child = config.launch()?;
246
247        /// Faillible initialization to run once the child process is created.
248        ///
249        /// All faillible calls must be executed inside this function. This ensures that all
250        /// errors are caught and that the child process is properly cleaned-up.
251        async fn with_child(
252            config: &BrowserConfig,
253            child: &mut Child,
254        ) -> Result<(String, Connection<CdpEventMessage>)> {
255            let dur = config.launch_timeout;
256            let timeout_fut = Box::pin(tokio::time::sleep(dur));
257
258            // extract the ws:
259            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
260            let conn = Connection::<CdpEventMessage>::connect_with_retries(
261                &debug_ws_url,
262                config.connection_retries,
263            )
264            .await?;
265            Ok((debug_ws_url, conn))
266        }
267
268        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
269            Ok(conn) => conn,
270            Err(e) => {
271                // An initialization error occurred, clean up the process
272                if let Ok(Some(_)) = child.try_wait() {
273                    // already exited, do nothing, may happen if the browser crashed
274                } else {
275                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
276                    let _ = child.kill().await;
277                    let _ = child.wait().await;
278                }
279                return Err(e);
280            }
281        };
282
283        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
284        // child process.
285
286        let (tx, rx) = channel(config.channel_capacity);
287
288        let handler_config = HandlerConfig {
289            ignore_https_errors: config.ignore_https_errors,
290            viewport: config.viewport.clone(),
291            context_ids: Vec::new(),
292            request_timeout: config.request_timeout,
293            request_intercept: config.request_intercept,
294            cache_enabled: config.cache_enabled,
295            ignore_visuals: config.ignore_visuals,
296            ignore_stylesheets: config.ignore_stylesheets,
297            ignore_javascript: config.ignore_javascript,
298            ignore_analytics: config.ignore_analytics,
299            ignore_prefetch: config.ignore_prefetch,
300            ignore_ads: config.ignore_ads,
301            extra_headers: config.extra_headers.clone(),
302            only_html: config.only_html,
303            service_worker_enabled: config.service_worker_enabled,
304            created_first_target: false,
305            intercept_manager: config.intercept_manager,
306            max_bytes_allowed: config.max_bytes_allowed,
307            max_redirects: config.max_redirects,
308            max_main_frame_navigations: config.max_main_frame_navigations,
309            whitelist_patterns: config.whitelist_patterns.clone(),
310            blacklist_patterns: config.blacklist_patterns.clone(),
311            #[cfg(feature = "adblock")]
312            adblock_filter_rules: config.adblock_filter_rules.clone(),
313            channel_capacity: config.channel_capacity,
314            page_channel_capacity: config.page_channel_capacity,
315            connection_retries: config.connection_retries,
316        };
317
318        let fut = Handler::new(conn, rx, handler_config);
319        let browser_context = fut.default_browser_context().clone();
320
321        let browser = Self {
322            sender: tx,
323            config: Some(config),
324            child: Some(child),
325            debug_ws_url,
326            browser_context,
327        };
328
329        Ok((browser, fut))
330    }
331
332    /// Request to fetch all existing browser targets.
333    ///
334    /// By default, only targets launched after the browser connection are tracked
335    /// when connecting to a existing browser instance with the devtools websocket url
336    /// This function fetches existing targets on the browser and adds them as pages internally
337    ///
338    /// The pages are not guaranteed to be ready as soon as the function returns
339    /// You should wait a few millis if you need to use a page
340    /// Returns [TargetInfo]
341    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
342        let (tx, rx) = oneshot_channel();
343
344        self.sender.send(HandlerMessage::FetchTargets(tx)).await?;
345
346        rx.await?
347    }
348
349    /// Request for the browser to close completely.
350    ///
351    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
352    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
353    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
354    /// [`Browser::drop`] waits automatically if needed.
355    pub async fn close(&self) -> Result<CloseReturns> {
356        let (tx, rx) = oneshot_channel();
357
358        self.sender.send(HandlerMessage::CloseBrowser(tx)).await?;
359
360        rx.await?
361    }
362
363    /// Asynchronously wait for the spawned chromium instance to exit completely.
364    ///
365    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
366    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
367    /// "zombie" processes.
368    ///
369    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
370    /// connected to an existing browser through [`Browser::connect`])
371    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
372        if let Some(child) = self.child.as_mut() {
373            Ok(Some(child.wait().await?))
374        } else {
375            Ok(None)
376        }
377    }
378
379    /// If the spawned chromium instance has completely exited, wait for it.
380    ///
381    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
382    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
383    /// "zombie" processes.
384    ///
385    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
386    /// connected to an existing browser through [`Browser::connect`])
387    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
388        if let Some(child) = self.child.as_mut() {
389            child.try_wait()
390        } else {
391            Ok(None)
392        }
393    }
394
395    /// Get the spawned chromium instance
396    ///
397    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
398    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
399    ///
400    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
401    /// for the selected runtime.
402    ///
403    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
404    /// connected to an existing browser through [`Browser::connect`])
405    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
406        self.child.as_mut()
407    }
408
409    /// Has a browser instance launched on system.
410    pub fn has_child(&self) -> bool {
411        self.child.is_some()
412    }
413
414    /// Forcibly kill the spawned chromium instance
415    ///
416    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
417    /// process to exit to avoid "zombie" processes.
418    ///
419    /// This method is provided to help if the browser does not close by itself. You should prefer
420    /// to use [`Browser::close`].
421    ///
422    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
423    /// connected to an existing browser through [`Browser::connect`])
424    pub async fn kill(&mut self) -> Option<io::Result<()>> {
425        match self.child.as_mut() {
426            Some(child) => Some(child.kill().await),
427            None => None,
428        }
429    }
430
431    /// If not launched as incognito this creates a new incognito browser
432    /// context. After that this browser exists within the incognito session.
433    /// New pages created while being in incognito mode will also run in the
434    /// incognito context. Incognito contexts won't share cookies/cache with
435    /// other browser contexts.
436    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
437        if !self.is_incognito_configured() {
438            let browser_context_id = self
439                .create_browser_context(CreateBrowserContextParams::default())
440                .await?;
441            self.browser_context = BrowserContext::from(browser_context_id);
442            self.sender
443                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
444                .await?;
445        }
446
447        Ok(self)
448    }
449
450    /// If a incognito session was created with
451    /// `Browser::start_incognito_context` this disposes this context.
452    ///
453    /// # Note This will also dispose all pages that were running within the
454    /// incognito context.
455    pub async fn quit_incognito_context_base(
456        &self,
457        browser_context_id: BrowserContextId,
458    ) -> Result<&Self> {
459        self.dispose_browser_context(browser_context_id.clone())
460            .await?;
461        self.sender
462            .send(HandlerMessage::DisposeContext(BrowserContext::from(
463                browser_context_id,
464            )))
465            .await?;
466        Ok(self)
467    }
468
469    /// If a incognito session was created with
470    /// `Browser::start_incognito_context` this disposes this context.
471    ///
472    /// # Note This will also dispose all pages that were running within the
473    /// incognito context.
474    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
475        if let Some(id) = self.browser_context.take() {
476            let _ = self.quit_incognito_context_base(id).await;
477        }
478        Ok(self)
479    }
480
481    /// Whether incognito mode was configured from the start
482    fn is_incognito_configured(&self) -> bool {
483        self.config
484            .as_ref()
485            .map(|c| c.incognito)
486            .unwrap_or_default()
487    }
488
489    /// Returns the address of the websocket this browser is attached to
490    pub fn websocket_address(&self) -> &String {
491        &self.debug_ws_url
492    }
493
494    /// Whether the BrowserContext is incognito.
495    pub fn is_incognito(&self) -> bool {
496        self.is_incognito_configured() || self.browser_context.is_incognito()
497    }
498
499    /// The config of the spawned chromium instance if any.
500    pub fn config(&self) -> Option<&BrowserConfig> {
501        self.config.as_ref()
502    }
503
504    /// Create a new browser page
505    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
506        let (tx, rx) = oneshot_channel();
507        let mut params = params.into();
508
509        if let Some(id) = self.browser_context.id() {
510            if params.browser_context_id.is_none() {
511                params.browser_context_id = Some(id.clone());
512            }
513        }
514
515        let _ = self
516            .sender
517            .send(HandlerMessage::CreatePage(params, tx))
518            .await;
519
520        rx.await?
521    }
522
523    /// Version information about the browser
524    pub async fn version(&self) -> Result<GetVersionReturns> {
525        Ok(self.execute(GetVersionParams::default()).await?.result)
526    }
527
528    /// Returns the user agent of the browser
529    pub async fn user_agent(&self) -> Result<String> {
530        Ok(self.version().await?.user_agent)
531    }
532
533    /// Call a browser method.
534    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
535        let (tx, rx) = oneshot_channel();
536        let method = cmd.identifier();
537        let msg = CommandMessage::new(cmd, tx)?;
538
539        self.sender.send(HandlerMessage::Command(msg)).await?;
540        let resp = rx.await??;
541        to_command_response::<T>(resp, method)
542    }
543
544    /// Set permission settings for given embedding and embedded origins.
545    /// [PermissionDescriptor](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionDescriptor)
546    /// [PermissionSetting](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionSetting)
547    pub async fn set_permission(
548        &self,
549        permission: PermissionDescriptor,
550        setting: PermissionSetting,
551        origin: Option<impl Into<String>>,
552        embedded_origin: Option<impl Into<String>>,
553        browser_context_id: Option<BrowserContextId>,
554    ) -> Result<&Self> {
555        self.execute(SetPermissionParams {
556            permission,
557            setting,
558            origin: origin.map(Into::into),
559            embedded_origin: embedded_origin.map(Into::into),
560            browser_context_id: browser_context_id.or_else(|| self.browser_context.id.clone()),
561        })
562        .await?;
563        Ok(self)
564    }
565
566    /// Convenience: set a permission for a single origin using the current browser context.
567    pub async fn set_permission_for_origin(
568        &self,
569        origin: impl Into<String>,
570        embedded_origin: Option<impl Into<String>>,
571        permission: PermissionDescriptor,
572        setting: PermissionSetting,
573    ) -> Result<&Self> {
574        self.set_permission(permission, setting, Some(origin), embedded_origin, None)
575            .await
576    }
577
578    /// "Reset" a permission override by setting it back to Prompt.
579    pub async fn reset_permission_for_origin(
580        &self,
581        origin: impl Into<String>,
582        embedded_origin: Option<impl Into<String>>,
583        permission: PermissionDescriptor,
584    ) -> Result<&Self> {
585        self.set_permission_for_origin(
586            origin,
587            embedded_origin,
588            permission,
589            PermissionSetting::Prompt,
590        )
591        .await
592    }
593
594    /// "Grant" all permissions.
595    pub async fn grant_all_permission_for_origin(
596        &self,
597        origin: impl Into<String>,
598        embedded_origin: Option<impl Into<String>>,
599        permission: PermissionDescriptor,
600    ) -> Result<&Self> {
601        self.set_permission_for_origin(
602            origin,
603            embedded_origin,
604            permission,
605            PermissionSetting::Granted,
606        )
607        .await
608    }
609
610    /// "Deny" all permissions.
611    pub async fn deny_all_permission_for_origin(
612        &self,
613        origin: impl Into<String>,
614        embedded_origin: Option<impl Into<String>>,
615        permission: PermissionDescriptor,
616    ) -> Result<&Self> {
617        self.set_permission_for_origin(
618            origin,
619            embedded_origin,
620            permission,
621            PermissionSetting::Denied,
622        )
623        .await
624    }
625
626    /// Return all of the pages of the browser
627    pub async fn pages(&self) -> Result<Vec<Page>> {
628        let (tx, rx) = oneshot_channel();
629        self.sender.send(HandlerMessage::GetPages(tx)).await?;
630        Ok(rx.await?)
631    }
632
633    /// Return page of given target_id
634    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
635        let (tx, rx) = oneshot_channel();
636        self.sender
637            .send(HandlerMessage::GetPage(target_id, tx))
638            .await?;
639        rx.await?.ok_or(CdpError::NotFound)
640    }
641
642    /// Set listener for browser event
643    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
644        let (tx, rx) = unbounded_channel();
645        self.sender
646            .send(HandlerMessage::AddEventListener(
647                EventListenerRequest::new::<T>(tx),
648            ))
649            .await?;
650
651        Ok(EventStream::new(rx))
652    }
653
654    /// Creates a new empty browser context.
655    pub async fn create_browser_context(
656        &mut self,
657        params: CreateBrowserContextParams,
658    ) -> Result<BrowserContextId> {
659        let response = self.execute(params).await?;
660
661        Ok(response.result.browser_context_id)
662    }
663
664    /// Returns all browser contexts created with Target.createBrowserContext method.
665    pub async fn get_browser_contexts(
666        &mut self,
667        params: GetBrowserContextsParams,
668    ) -> Result<GetBrowserContextsReturns> {
669        let response = self.execute(params).await?;
670        Ok(response.result)
671    }
672
673    /// Send a new empty browser context.
674    pub async fn send_new_context(
675        &mut self,
676        browser_context_id: BrowserContextId,
677    ) -> Result<&Self> {
678        self.browser_context = BrowserContext::from(browser_context_id);
679        self.sender
680            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
681            .await?;
682        Ok(self)
683    }
684
685    /// Deletes a browser context.
686    pub async fn dispose_browser_context(
687        &self,
688        browser_context_id: impl Into<BrowserContextId>,
689    ) -> Result<&Self> {
690        self.execute(DisposeBrowserContextParams::new(browser_context_id))
691            .await?;
692
693        Ok(self)
694    }
695
696    /// Clears cookies.
697    pub async fn clear_cookies(&self) -> Result<&Self> {
698        self.execute(ClearCookiesParams::default()).await?;
699        Ok(self)
700    }
701
702    /// Returns all browser cookies.
703    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
704        let cmd = GetCookiesParams {
705            browser_context_id: self.browser_context.id.clone(),
706        };
707
708        Ok(self.execute(cmd).await?.result.cookies)
709    }
710
711    /// Sets given cookies.
712    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
713        for cookie in &mut cookies {
714            if let Some(url) = cookie.url.as_ref() {
715                crate::page::validate_cookie_url(url)?;
716            }
717        }
718
719        let mut cookies_param = SetCookiesParams::new(cookies);
720
721        cookies_param.browser_context_id = self.browser_context.id.clone();
722
723        self.execute(cookies_param).await?;
724        Ok(self)
725    }
726}
727
728impl Drop for Browser {
729    fn drop(&mut self) {
730        if let Some(child) = self.child.as_mut() {
731            if let Ok(Some(_)) = child.try_wait() {
732                // Already exited, do nothing. Usually occurs after using the method close or kill.
733            } else {
734                // We set the `kill_on_drop` property for the child process, so no need to explicitely
735                // kill it here. It can't really be done anyway since the method is async.
736                //
737                // On Unix, the process will be reaped in the background by the runtime automatically
738                // so it won't leave any resources locked. It is, however, a better practice for the user to
739                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
740                // warn him here.
741                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
742            }
743        }
744    }
745}
746
747/// Resolve devtools WebSocket URL from the provided browser process
748///
749/// If an error occurs, it returns the browser's stderr output.
750///
751/// The URL resolution fails if:
752/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
753/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
754/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
755///   the browser's stderr: end of stream, invalid UTF-8, other
756async fn ws_url_from_output(
757    child_process: &mut Child,
758    timeout_fut: impl Future<Output = ()> + Unpin,
759) -> Result<String> {
760    use tokio::io::AsyncBufReadExt;
761    let stderr = match child_process.stderr.take() {
762        Some(stderr) => stderr,
763        None => {
764            return Err(CdpError::LaunchIo(
765                io::Error::new(io::ErrorKind::NotFound, "browser process has no stderr"),
766                BrowserStderr::new(Vec::new()),
767            ));
768        }
769    };
770    let mut stderr_bytes = Vec::<u8>::new();
771    let mut buf = tokio::io::BufReader::new(stderr);
772    let mut timeout_fut = timeout_fut;
773    loop {
774        tokio::select! {
775            _ = &mut timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
776            exit_status = child_process.wait() => {
777                return Err(match exit_status {
778                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
779                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
780                })
781            },
782            read_res = buf.read_until(b'\n', &mut stderr_bytes) => {
783                match read_res {
784                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
785                    Ok(byte_count) => {
786                        if byte_count == 0 {
787                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
788                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
789                        }
790                        let start_offset = stderr_bytes.len() - byte_count;
791                        let new_bytes = &stderr_bytes[start_offset..];
792                        match std::str::from_utf8(new_bytes) {
793                            Err(_) => {
794                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
795                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
796                            }
797                            Ok(line) => {
798                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
799                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
800                                        return Ok(ws.trim().to_string());
801                                    }
802                                }
803                            }
804                        }
805                    }
806                }
807            }
808        }
809    }
810}
811
812#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
813pub enum HeadlessMode {
814    /// The "headful" mode.
815    False,
816    /// The old headless mode.
817    #[default]
818    True,
819    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
820    New,
821}
822
823#[derive(Debug, Clone, Default)]
824pub struct BrowserConfig {
825    /// Determines whether to run headless version of the browser. Defaults to
826    /// true.
827    headless: HeadlessMode,
828    /// Determines whether to run the browser with a sandbox.
829    sandbox: bool,
830    /// Launch the browser with a specific window width and height.
831    window_size: Option<(u32, u32)>,
832    /// Launch the browser with a specific debugging port.
833    port: u16,
834    /// Path for Chrome or Chromium.
835    ///
836    /// If unspecified, the create will try to automatically detect a suitable
837    /// binary.
838    executable: std::path::PathBuf,
839
840    /// A list of Chrome extensions to load.
841    ///
842    /// An extension should be a path to a folder containing the extension code.
843    /// CRX files cannot be used directly and must be first extracted.
844    ///
845    /// Note that Chrome does not support loading extensions in headless-mode.
846    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
847    extensions: Vec<String>,
848
849    /// Environment variables to set for the Chromium process.
850    /// Passes value through to std::process::Command::envs.
851    pub process_envs: Option<HashMap<String, String>>,
852
853    /// Data dir for user data
854    pub user_data_dir: Option<PathBuf>,
855
856    /// Whether to launch the `Browser` in incognito mode.
857    incognito: bool,
858
859    /// Timeout duration for `Browser::launch`.
860    launch_timeout: Duration,
861
862    /// Ignore https errors, default is true.
863    ignore_https_errors: bool,
864    pub viewport: Option<Viewport>,
865    /// The duration after a request with no response should time out.
866    request_timeout: Duration,
867
868    /// Additional command line arguments to pass to the browser instance.
869    args: Vec<String>,
870
871    /// Whether to disable DEFAULT_ARGS or not, default is false.
872    disable_default_args: bool,
873
874    /// Whether to enable request interception.
875    pub request_intercept: bool,
876
877    /// Whether to enable cache.
878    pub cache_enabled: bool,
879    /// Whether to enable or disable Service Workers.
880    /// Disabling may reduce background network activity and caching effects.
881    pub service_worker_enabled: bool,
882    /// Whether to ignore image/visual requests during interception.
883    /// Can reduce bandwidth and speed up crawling when visuals are unnecessary.
884    pub ignore_visuals: bool,
885    /// Whether to ignore stylesheet (CSS) requests during interception.
886    /// Useful for content-only crawls.
887    pub ignore_stylesheets: bool,
888    /// Whether to ignore JavaScript requests during interception.
889    /// This still allows critical framework bundles to pass when applicable.
890    pub ignore_javascript: bool,
891    /// Whether to ignore analytics/telemetry requests during interception.
892    pub ignore_analytics: bool,
893    /// Ignore prefetch request.
894    pub ignore_prefetch: bool,
895    /// Whether to ignore ad network requests during interception.
896    pub ignore_ads: bool,
897    /// Extra headers.
898    pub extra_headers: Option<std::collections::HashMap<String, String>>,
899    /// Only html
900    pub only_html: bool,
901    /// The interception intercept manager.
902    pub intercept_manager: NetworkInterceptManager,
903    /// The max bytes to receive.
904    pub max_bytes_allowed: Option<u64>,
905    /// Cap on Document-type redirect hops before the navigation is aborted.
906    /// `None` disables enforcement; `Some(n)` mirrors `reqwest::redirect::Policy::limited(n)`.
907    pub max_redirects: Option<usize>,
908    /// Cap on main-frame cross-document navigations per `goto`. Defends against
909    /// JS / meta-refresh loops that bypass the HTTP redirect guard. `None`
910    /// disables the guard.
911    pub max_main_frame_navigations: Option<u32>,
912    /// Whitelist patterns to allow through the network.
913    pub whitelist_patterns: Option<Vec<String>>,
914    /// Blacklist patterns to block through the network.
915    pub blacklist_patterns: Option<Vec<String>>,
916    /// Extra ABP/uBO filter rules to load into the adblock engine (requires `adblock` feature).
917    /// These are merged with the built-in `ADBLOCK_PATTERNS` for richer blocking
918    /// (e.g. EasyList / EasyPrivacy content).
919    #[cfg(feature = "adblock")]
920    pub adblock_filter_rules: Option<Vec<String>>,
921    /// Capacity of the channel between browser handle and handler.
922    /// Defaults to 1000.
923    pub channel_capacity: usize,
924    /// Capacity of the per-page mpsc channel carrying `TargetMessage`s
925    /// from each `Page` to the handler. Defaults to 2048; override via
926    /// `page_channel_capacity(N)` on the builder. Values of `0` are
927    /// clamped to `1` at channel creation.
928    pub page_channel_capacity: usize,
929    /// Number of WebSocket connection retry attempts with exponential backoff.
930    /// Defaults to 4.
931    pub connection_retries: u32,
932}
933
934#[derive(Debug, Clone)]
935pub struct BrowserConfigBuilder {
936    /// Headless mode configuration for the browser.
937    headless: HeadlessMode,
938    /// Whether to run the browser with a sandbox.
939    sandbox: bool,
940    /// Optional initial browser window size `(width, height)`.
941    window_size: Option<(u32, u32)>,
942    /// DevTools debugging port to bind to.
943    port: u16,
944    /// Optional explicit path to the Chrome/Chromium executable.
945    /// If `None`, auto-detection may be attempted based on `executation_detection`.
946    executable: Option<PathBuf>,
947    /// Controls auto-detection behavior for finding a Chrome/Chromium binary.
948    executation_detection: DetectionOptions,
949    /// List of unpacked extensions (directories) to load at startup.
950    extensions: Vec<String>,
951    /// Environment variables to set on the spawned Chromium process.
952    process_envs: Option<HashMap<String, String>>,
953    /// User data directory to persist browser state, or `None` for ephemeral.
954    user_data_dir: Option<PathBuf>,
955    /// Whether to start the browser in incognito (off-the-record) mode.
956    incognito: bool,
957    /// Maximum time to wait for the browser to launch and become ready.
958    launch_timeout: Duration,
959    /// Whether to ignore HTTPS/TLS errors during navigation and requests.
960    ignore_https_errors: bool,
961    /// Default page viewport configuration applied on startup.
962    viewport: Option<Viewport>,
963    /// Timeout for individual network requests without response progress.
964    request_timeout: Duration,
965    /// Additional command-line flags passed directly to the browser process.
966    args: Vec<String>,
967    /// Disable the default argument set and use only the provided `args`.
968    disable_default_args: bool,
969    /// Enable Network.requestInterception for request filtering/handling.
970    request_intercept: bool,
971    /// Enable the browser cache for navigations and subresources.
972    cache_enabled: bool,
973    /// Enable/disable Service Workers.
974    service_worker_enabled: bool,
975    /// Drop image/visual requests when interception is enabled.
976    ignore_visuals: bool,
977    /// Drop ad network requests when interception is enabled.
978    ignore_ads: bool,
979    /// Drop JavaScript requests when interception is enabled.
980    ignore_javascript: bool,
981    /// Drop stylesheet (CSS) requests when interception is enabled.
982    ignore_stylesheets: bool,
983    /// Ignore prefetch domains.
984    ignore_prefetch: bool,
985    /// Drop analytics/telemetry requests when interception is enabled.
986    ignore_analytics: bool,
987    /// If `true`, limit fetching to HTML documents.
988    only_html: bool,
989    /// Extra HTTP headers to include with every request.
990    extra_headers: Option<std::collections::HashMap<String, String>>,
991    /// Network interception manager used to configure filtering behavior.
992    intercept_manager: NetworkInterceptManager,
993    /// Optional upper bound on bytes that may be received (per session/run).
994    max_bytes_allowed: Option<u64>,
995    /// Optional cap on Document redirect hops per navigation (`None` = disabled).
996    max_redirects: Option<usize>,
997    /// Optional cap on main-frame cross-document navigations per goto.
998    max_main_frame_navigations: Option<u32>,
999    /// Whitelist patterns to allow through the network.
1000    whitelist_patterns: Option<Vec<String>>,
1001    /// Blacklist patterns to block through the network.
1002    blacklist_patterns: Option<Vec<String>>,
1003    /// Extra ABP/uBO filter rules for the adblock engine.
1004    #[cfg(feature = "adblock")]
1005    adblock_filter_rules: Option<Vec<String>>,
1006    /// Capacity of the channel between browser handle and handler.
1007    channel_capacity: usize,
1008    /// Capacity of the per-page mpsc `TargetMessage` channel.
1009    page_channel_capacity: usize,
1010    /// Number of WebSocket connection retry attempts.
1011    connection_retries: u32,
1012}
1013
1014impl BrowserConfig {
1015    /// Browser builder default config.
1016    pub fn builder() -> BrowserConfigBuilder {
1017        BrowserConfigBuilder::default()
1018    }
1019
1020    /// Launch with the executable path.
1021    pub fn with_executable(path: impl AsRef<Path>) -> Self {
1022        // SAFETY: build() only fails when no executable is provided,
1023        // but we always provide one via chrome_executable().
1024        Self::builder().chrome_executable(path).build().unwrap()
1025    }
1026}
1027
1028impl Default for BrowserConfigBuilder {
1029    fn default() -> Self {
1030        Self {
1031            headless: HeadlessMode::True,
1032            sandbox: true,
1033            window_size: None,
1034            port: 0,
1035            executable: None,
1036            executation_detection: DetectionOptions::default(),
1037            extensions: Vec::new(),
1038            process_envs: None,
1039            user_data_dir: None,
1040            incognito: false,
1041            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
1042            ignore_https_errors: true,
1043            viewport: Some(Default::default()),
1044            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1045            args: Vec::new(),
1046            disable_default_args: false,
1047            request_intercept: false,
1048            cache_enabled: true,
1049            ignore_visuals: false,
1050            ignore_ads: false,
1051            ignore_javascript: false,
1052            ignore_analytics: false,
1053            ignore_stylesheets: false,
1054            ignore_prefetch: true,
1055            only_html: false,
1056            extra_headers: Default::default(),
1057            service_worker_enabled: true,
1058            intercept_manager: NetworkInterceptManager::Unknown,
1059            max_bytes_allowed: None,
1060            max_redirects: None,
1061            max_main_frame_navigations: None,
1062            whitelist_patterns: None,
1063            blacklist_patterns: None,
1064            #[cfg(feature = "adblock")]
1065            adblock_filter_rules: None,
1066            channel_capacity: 4096,
1067            page_channel_capacity: crate::handler::page::DEFAULT_PAGE_CHANNEL_CAPACITY,
1068            connection_retries: crate::conn::DEFAULT_CONNECTION_RETRIES,
1069        }
1070    }
1071}
1072
1073impl BrowserConfigBuilder {
1074    /// Configure window size.
1075    pub fn window_size(mut self, width: u32, height: u32) -> Self {
1076        self.window_size = Some((width, height));
1077        self
1078    }
1079    /// Configure sandboxing.
1080    pub fn no_sandbox(mut self) -> Self {
1081        self.sandbox = false;
1082        self
1083    }
1084    /// Configure the launch to start non headless.
1085    pub fn with_head(mut self) -> Self {
1086        self.headless = HeadlessMode::False;
1087        self
1088    }
1089    /// Configure the launch with the new headless mode.
1090    pub fn new_headless_mode(mut self) -> Self {
1091        self.headless = HeadlessMode::New;
1092        self
1093    }
1094    /// Configure the launch with headless.
1095    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
1096        self.headless = mode;
1097        self
1098    }
1099    /// Configure the launch in incognito.
1100    pub fn incognito(mut self) -> Self {
1101        self.incognito = true;
1102        self
1103    }
1104
1105    pub fn respect_https_errors(mut self) -> Self {
1106        self.ignore_https_errors = false;
1107        self
1108    }
1109
1110    pub fn port(mut self, port: u16) -> Self {
1111        self.port = port;
1112        self
1113    }
1114
1115    pub fn with_max_bytes_allowed(mut self, max_bytes_allowed: Option<u64>) -> Self {
1116        self.max_bytes_allowed = max_bytes_allowed;
1117        self
1118    }
1119
1120    /// Cap the number of Document-type redirect hops per navigation.
1121    ///
1122    /// `None` disables enforcement (default, preserves Chromium's own ~20-hop cap).
1123    /// `Some(n)` aborts once a navigation chain exceeds `n` by emitting
1124    /// `net::ERR_TOO_MANY_REDIRECTS` and calling `Page.stopLoading`.
1125    pub fn with_max_redirects(mut self, max_redirects: Option<usize>) -> Self {
1126        self.max_redirects = max_redirects;
1127        self
1128    }
1129
1130    /// Cap the number of main-frame cross-document navigations allowed per
1131    /// `goto` call.
1132    ///
1133    /// Defends against JS `location.href` / meta-refresh loops that bypass
1134    /// HTTP-level redirect detection — each hop looks like a fresh document
1135    /// to Chromium, so `with_max_redirects` alone cannot catch them. `None`
1136    /// disables the guard (default).
1137    pub fn with_max_main_frame_navigations(mut self, cap: Option<u32>) -> Self {
1138        self.max_main_frame_navigations = cap;
1139        self
1140    }
1141
1142    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
1143        self.launch_timeout = timeout;
1144        self
1145    }
1146
1147    pub fn request_timeout(mut self, timeout: Duration) -> Self {
1148        self.request_timeout = timeout;
1149        self
1150    }
1151
1152    /// Configures the viewport of the browser, which defaults to `800x600`.
1153    /// `None` disables viewport emulation (i.e., it uses the browsers default
1154    /// configuration, which fills the available space. This is similar to what
1155    /// Playwright does when you provide `null` as the value of its `viewport`
1156    /// option).
1157    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
1158        self.viewport = viewport.into();
1159        self
1160    }
1161
1162    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
1163        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
1164        self
1165    }
1166
1167    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
1168        self.executable = Some(path.as_ref().to_path_buf());
1169        self
1170    }
1171
1172    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
1173        self.executation_detection = options;
1174        self
1175    }
1176
1177    pub fn extension(mut self, extension: impl Into<String>) -> Self {
1178        self.extensions.push(extension.into());
1179        self
1180    }
1181
1182    pub fn extensions<I, S>(mut self, extensions: I) -> Self
1183    where
1184        I: IntoIterator<Item = S>,
1185        S: Into<String>,
1186    {
1187        for ext in extensions {
1188            self.extensions.push(ext.into());
1189        }
1190        self
1191    }
1192
1193    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
1194        self.process_envs
1195            .get_or_insert(HashMap::new())
1196            .insert(key.into(), val.into());
1197        self
1198    }
1199
1200    pub fn envs<I, K, V>(mut self, envs: I) -> Self
1201    where
1202        I: IntoIterator<Item = (K, V)>,
1203        K: Into<String>,
1204        V: Into<String>,
1205    {
1206        self.process_envs
1207            .get_or_insert(HashMap::new())
1208            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
1209        self
1210    }
1211
1212    pub fn arg(mut self, arg: impl Into<String>) -> Self {
1213        self.args.push(arg.into());
1214        self
1215    }
1216
1217    pub fn args<I, S>(mut self, args: I) -> Self
1218    where
1219        I: IntoIterator<Item = S>,
1220        S: Into<String>,
1221    {
1222        for arg in args {
1223            self.args.push(arg.into());
1224        }
1225        self
1226    }
1227
1228    pub fn disable_default_args(mut self) -> Self {
1229        self.disable_default_args = true;
1230        self
1231    }
1232
1233    pub fn enable_request_intercept(mut self) -> Self {
1234        self.request_intercept = true;
1235        self
1236    }
1237
1238    pub fn disable_request_intercept(mut self) -> Self {
1239        self.request_intercept = false;
1240        self
1241    }
1242
1243    pub fn enable_cache(mut self) -> Self {
1244        self.cache_enabled = true;
1245        self
1246    }
1247
1248    pub fn disable_cache(mut self) -> Self {
1249        self.cache_enabled = false;
1250        self
1251    }
1252
1253    /// Set service worker enabled.
1254    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
1255        self.service_worker_enabled = bypass;
1256        self
1257    }
1258
1259    /// Set extra request headers.
1260    pub fn set_extra_headers(
1261        mut self,
1262        headers: Option<std::collections::HashMap<String, String>>,
1263    ) -> Self {
1264        self.extra_headers = headers;
1265        self
1266    }
1267
1268    /// Set whitelist patterns to allow through network interception allowing.
1269    pub fn set_whitelist_patterns(mut self, whitelist_patterns: Option<Vec<String>>) -> Self {
1270        self.whitelist_patterns = whitelist_patterns;
1271        self
1272    }
1273
1274    /// Set blacklist patterns to block through network interception.
1275    pub fn set_blacklist_patterns(mut self, blacklist_patterns: Option<Vec<String>>) -> Self {
1276        self.blacklist_patterns = blacklist_patterns;
1277        self
1278    }
1279
1280    /// Set extra ABP/uBO filter rules for the adblock engine.
1281    /// Pass EasyList/EasyPrivacy content lines for richer blocking coverage.
1282    #[cfg(feature = "adblock")]
1283    pub fn set_adblock_filter_rules(mut self, rules: Vec<String>) -> Self {
1284        self.adblock_filter_rules = Some(rules);
1285        self
1286    }
1287
1288    /// Set the capacity of the channel between browser handle and handler.
1289    /// Defaults to 1000.
1290    pub fn channel_capacity(mut self, capacity: usize) -> Self {
1291        self.channel_capacity = capacity;
1292        self
1293    }
1294
1295    /// Set the capacity of the per-page mpsc channel carrying
1296    /// `TargetMessage`s from each `Page` to the handler.
1297    ///
1298    /// Defaults to 2048 (the previous hard-coded value). Tune upward to
1299    /// absorb bursts of commands without pushing them onto the
1300    /// `CommandFuture` async-send fallback path; tune downward to apply
1301    /// back-pressure sooner. Values of `0` are clamped to `1` at channel
1302    /// creation time (tokio panics on a zero-capacity mpsc).
1303    pub fn page_channel_capacity(mut self, capacity: usize) -> Self {
1304        self.page_channel_capacity = capacity;
1305        self
1306    }
1307
1308    /// Set the number of WebSocket connection retry attempts with exponential backoff.
1309    /// Defaults to 4. Set to 0 for a single attempt with no retries.
1310    pub fn connection_retries(mut self, retries: u32) -> Self {
1311        self.connection_retries = retries;
1312        self
1313    }
1314
1315    /// Build the browser.
1316    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1317        let executable = if let Some(e) = self.executable {
1318            e
1319        } else {
1320            detection::default_executable(self.executation_detection)?
1321        };
1322
1323        Ok(BrowserConfig {
1324            headless: self.headless,
1325            sandbox: self.sandbox,
1326            window_size: self.window_size,
1327            port: self.port,
1328            executable,
1329            extensions: self.extensions,
1330            process_envs: self.process_envs,
1331            user_data_dir: self.user_data_dir,
1332            incognito: self.incognito,
1333            launch_timeout: self.launch_timeout,
1334            ignore_https_errors: self.ignore_https_errors,
1335            viewport: self.viewport,
1336            request_timeout: self.request_timeout,
1337            args: self.args,
1338            disable_default_args: self.disable_default_args,
1339            request_intercept: self.request_intercept,
1340            cache_enabled: self.cache_enabled,
1341            ignore_visuals: self.ignore_visuals,
1342            ignore_ads: self.ignore_ads,
1343            ignore_javascript: self.ignore_javascript,
1344            ignore_analytics: self.ignore_analytics,
1345            ignore_stylesheets: self.ignore_stylesheets,
1346            ignore_prefetch: self.ignore_prefetch,
1347            extra_headers: self.extra_headers,
1348            only_html: self.only_html,
1349            intercept_manager: self.intercept_manager,
1350            service_worker_enabled: self.service_worker_enabled,
1351            max_bytes_allowed: self.max_bytes_allowed,
1352            max_redirects: self.max_redirects,
1353            max_main_frame_navigations: self.max_main_frame_navigations,
1354            whitelist_patterns: self.whitelist_patterns,
1355            blacklist_patterns: self.blacklist_patterns,
1356            #[cfg(feature = "adblock")]
1357            adblock_filter_rules: self.adblock_filter_rules,
1358            channel_capacity: self.channel_capacity,
1359            page_channel_capacity: self.page_channel_capacity,
1360            connection_retries: self.connection_retries,
1361        })
1362    }
1363}
1364
1365impl BrowserConfig {
1366    pub fn launch(&self) -> io::Result<Child> {
1367        let mut cmd = async_process::Command::new(&self.executable);
1368
1369        if self.disable_default_args {
1370            cmd.args(&self.args);
1371        } else {
1372            cmd.args(DEFAULT_ARGS).args(&self.args);
1373        }
1374
1375        if !self
1376            .args
1377            .iter()
1378            .any(|arg| arg.contains("--remote-debugging-port="))
1379        {
1380            cmd.arg(format!("--remote-debugging-port={}", self.port));
1381        }
1382
1383        cmd.args(
1384            self.extensions
1385                .iter()
1386                .map(|e| format!("--load-extension={e}")),
1387        );
1388
1389        if let Some(ref user_data) = self.user_data_dir {
1390            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1391        } else {
1392            // If the user did not specify a data directory, this would default to the systems default
1393            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1394            // a temp dir just for chromiumoxide instead.
1395            cmd.arg(format!(
1396                "--user-data-dir={}",
1397                std::env::temp_dir().join("chromiumoxide-runner").display()
1398            ));
1399        }
1400
1401        if let Some((width, height)) = self.window_size {
1402            cmd.arg(format!("--window-size={width},{height}"));
1403        }
1404
1405        if !self.sandbox {
1406            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1407        }
1408
1409        match self.headless {
1410            HeadlessMode::False => (),
1411            HeadlessMode::True => {
1412                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1413            }
1414            HeadlessMode::New => {
1415                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1416            }
1417        }
1418
1419        if self.incognito {
1420            cmd.arg("--incognito");
1421        }
1422
1423        if let Some(ref envs) = self.process_envs {
1424            cmd.envs(envs);
1425        }
1426        cmd.stderr(Stdio::piped()).spawn()
1427    }
1428}
1429
1430/// Returns the path to Chrome's executable.
1431///
1432/// If the `CHROME` environment variable is set, `default_executable` will
1433/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1434/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1435/// searched for in standard places. If that fails,
1436/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1437/// Windows) is consulted. If all of the above fail, an error is returned.
1438#[deprecated(note = "Use detection::default_executable instead")]
1439pub fn default_executable() -> Result<std::path::PathBuf, String> {
1440    let options = DetectionOptions {
1441        msedge: false,
1442        unstable: false,
1443    };
1444    detection::default_executable(options)
1445}
1446
1447/// These are passed to the Chrome binary by default.
1448/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1449static DEFAULT_ARGS: [&str; 26] = [
1450    "--disable-background-networking",
1451    "--enable-features=NetworkService,NetworkServiceInProcess",
1452    "--disable-background-timer-throttling",
1453    "--disable-backgrounding-occluded-windows",
1454    "--disable-breakpad",
1455    "--disable-client-side-phishing-detection",
1456    "--disable-component-extensions-with-background-pages",
1457    "--disable-default-apps",
1458    "--disable-dev-shm-usage",
1459    "--disable-extensions",
1460    "--disable-features=TranslateUI",
1461    "--disable-hang-monitor",
1462    "--disable-ipc-flooding-protection",
1463    "--disable-popup-blocking",
1464    "--disable-prompt-on-repost",
1465    "--disable-renderer-backgrounding",
1466    "--disable-sync",
1467    "--force-color-profile=srgb",
1468    "--metrics-recording-only",
1469    "--no-first-run",
1470    "--enable-automation",
1471    "--password-store=basic",
1472    "--use-mock-keychain",
1473    "--enable-blink-features=IdleDetection",
1474    "--lang=en_US",
1475    "--disable-blink-features=AutomationControlled",
1476];