Skip to main content

chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use tokio::sync::mpsc::{channel, unbounded_channel, Sender};
11use tokio::sync::oneshot::channel as oneshot_channel;
12
13use crate::async_process::{self, Child, ExitStatus, Stdio};
14use crate::cmd::{to_command_response, CommandMessage};
15use crate::conn::Connection;
16use crate::detection::{self, DetectionOptions};
17use crate::error::{BrowserStderr, CdpError, Result};
18use crate::handler::browser::BrowserContext;
19use crate::handler::viewport::Viewport;
20use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
21use crate::listeners::{EventListenerRequest, EventStream};
22use crate::page::Page;
23use crate::utils;
24use chromiumoxide_cdp::cdp::browser_protocol::browser::{
25    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
26};
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    PermissionDescriptor, PermissionSetting, SetPermissionParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
36    GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
37};
38
39use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
40use chromiumoxide_types::*;
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42
43/// Default `Browser::launch` timeout in MS
44pub const LAUNCH_TIMEOUT: u64 = 20_000;
45
46lazy_static::lazy_static! {
47    /// The request client to get the web socket url.
48    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
49        .timeout(Duration::from_secs(60))
50        .default_headers({
51            let mut m = HeaderMap::new();
52
53            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55            m
56        })
57        .tcp_keepalive(Some(Duration::from_secs(5)))
58        .pool_idle_timeout(Some(Duration::from_secs(60)))
59        .pool_max_idle_per_host(10)
60        .build()
61        .expect("client to build");
62}
63
64/// Returns chromey's global `reqwest::Client` for reuse by other subsystems
65/// (e.g. remote cache uploads via `spider_remote_cache`).
66pub fn request_client() -> &'static reqwest::Client {
67    &REQUEST_CLIENT
68}
69
70/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
71#[derive(Debug)]
72pub struct Browser {
73    /// The `Sender` to send messages to the connection handler that drives the
74    /// websocket
75    pub(crate) sender: Sender<HandlerMessage>,
76    /// How the spawned chromium instance was configured, if any
77    config: Option<BrowserConfig>,
78    /// The spawned chromium instance
79    child: Option<Child>,
80    /// The debug web socket url of the chromium instance
81    debug_ws_url: String,
82    /// The context of the browser
83    pub browser_context: BrowserContext,
84}
85
86/// Browser connection information.
87#[derive(serde::Deserialize, Debug, Default)]
88pub struct BrowserConnection {
89    #[serde(rename = "Browser")]
90    /// The browser name
91    pub browser: String,
92    #[serde(rename = "Protocol-Version")]
93    /// Browser version
94    pub protocol_version: String,
95    #[serde(rename = "User-Agent")]
96    /// User Agent used by default.
97    pub user_agent: String,
98    #[serde(rename = "V8-Version")]
99    /// The v8 engine version
100    pub v8_version: String,
101    #[serde(rename = "WebKit-Version")]
102    /// Webkit version
103    pub webkit_version: String,
104    #[serde(rename = "webSocketDebuggerUrl")]
105    /// Remote debugging address
106    pub web_socket_debugger_url: String,
107}
108
109impl Browser {
110    /// Connect to an already running chromium instance via the given URL.
111    ///
112    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
113    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
114        Self::connect_with_config(url, HandlerConfig::default()).await
115    }
116
117    // Connect to an already running chromium instance with a given `HandlerConfig`.
118    ///
119    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
120    pub async fn connect_with_config(
121        url: impl Into<String>,
122        config: HandlerConfig,
123    ) -> Result<(Self, Handler)> {
124        let mut debug_ws_url = url.into();
125        let retries = config.connection_retries;
126
127        if debug_ws_url.starts_with("http") {
128            let version_url = if debug_ws_url.ends_with("/json/version")
129                || debug_ws_url.ends_with("/json/version/")
130            {
131                debug_ws_url.to_owned()
132            } else {
133                format!(
134                    "{}{}json/version",
135                    &debug_ws_url,
136                    if debug_ws_url.ends_with('/') { "" } else { "/" }
137                )
138            };
139
140            let mut discovered = false;
141
142            for attempt in 0..=retries {
143                let retry = || async {
144                    if attempt < retries {
145                        let backoff_ms = 50u64 * 3u64.saturating_pow(attempt);
146                        tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
147                    }
148                };
149
150                match REQUEST_CLIENT.get(&version_url).send().await {
151                    Ok(req) => match req.bytes().await {
152                        Ok(b) => {
153                            match crate::serde_json::from_slice::<Box<BrowserConnection>>(&b) {
154                                Ok(connection)
155                                    if !connection.web_socket_debugger_url.is_empty() =>
156                                {
157                                    debug_ws_url = connection.web_socket_debugger_url;
158                                    discovered = true;
159                                    break;
160                                }
161                                _ => {
162                                    // JSON parse failed or webSocketDebuggerUrl was empty — retry
163                                    retry().await;
164                                }
165                            }
166                        }
167                        Err(_) => {
168                            retry().await;
169                        }
170                    },
171                    Err(_) => {
172                        retry().await;
173                    }
174                }
175            }
176
177            if !discovered {
178                return Err(CdpError::NoResponse);
179            }
180        }
181
182        let conn =
183            Connection::<CdpEventMessage>::connect_with_retries(&debug_ws_url, retries).await?;
184
185        let (tx, rx) = channel(config.channel_capacity);
186
187        let handler_config = BrowserConfig {
188            ignore_https_errors: config.ignore_https_errors,
189            viewport: config.viewport.clone(),
190            request_timeout: config.request_timeout,
191            request_intercept: config.request_intercept,
192            cache_enabled: config.cache_enabled,
193            ignore_visuals: config.ignore_visuals,
194            ignore_stylesheets: config.ignore_stylesheets,
195            ignore_javascript: config.ignore_javascript,
196            ignore_analytics: config.ignore_analytics,
197            ignore_prefetch: config.ignore_prefetch,
198            ignore_ads: config.ignore_ads,
199            extra_headers: config.extra_headers.clone(),
200            only_html: config.only_html,
201            service_worker_enabled: config.service_worker_enabled,
202            intercept_manager: config.intercept_manager,
203            max_bytes_allowed: config.max_bytes_allowed,
204            max_redirects: config.max_redirects,
205            max_main_frame_navigations: config.max_main_frame_navigations,
206            whitelist_patterns: config.whitelist_patterns.clone(),
207            blacklist_patterns: config.blacklist_patterns.clone(),
208            ..Default::default()
209        };
210
211        let fut = Handler::new(conn, rx, config);
212        let browser_context = fut.default_browser_context().clone();
213
214        let browser = Self {
215            sender: tx,
216            config: Some(handler_config),
217            child: None,
218            debug_ws_url,
219            browser_context,
220        };
221
222        Ok((browser, fut))
223    }
224
225    /// Launches a new instance of `chromium` in the background and attaches to
226    /// its debug web socket.
227    ///
228    /// This fails when no chromium executable could be detected.
229    ///
230    /// This fails if no web socket url could be detected from the child
231    /// processes stderr for more than the configured `launch_timeout`
232    /// (20 seconds by default).
233    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
234        // Canonalize paths to reduce issues with sandboxing
235        config.executable = utils::canonicalize_except_snap(config.executable).await?;
236
237        // Launch a new chromium instance
238        let mut child = config.launch()?;
239
240        /// Faillible initialization to run once the child process is created.
241        ///
242        /// All faillible calls must be executed inside this function. This ensures that all
243        /// errors are caught and that the child process is properly cleaned-up.
244        async fn with_child(
245            config: &BrowserConfig,
246            child: &mut Child,
247        ) -> Result<(String, Connection<CdpEventMessage>)> {
248            let dur = config.launch_timeout;
249            let timeout_fut = Box::pin(tokio::time::sleep(dur));
250
251            // extract the ws:
252            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
253            let conn = Connection::<CdpEventMessage>::connect_with_retries(
254                &debug_ws_url,
255                config.connection_retries,
256            )
257            .await?;
258            Ok((debug_ws_url, conn))
259        }
260
261        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
262            Ok(conn) => conn,
263            Err(e) => {
264                // An initialization error occurred, clean up the process
265                if let Ok(Some(_)) = child.try_wait() {
266                    // already exited, do nothing, may happen if the browser crashed
267                } else {
268                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
269                    let _ = child.kill().await;
270                    let _ = child.wait().await;
271                }
272                return Err(e);
273            }
274        };
275
276        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
277        // child process.
278
279        let (tx, rx) = channel(config.channel_capacity);
280
281        let handler_config = HandlerConfig {
282            ignore_https_errors: config.ignore_https_errors,
283            viewport: config.viewport.clone(),
284            context_ids: Vec::new(),
285            request_timeout: config.request_timeout,
286            request_intercept: config.request_intercept,
287            cache_enabled: config.cache_enabled,
288            ignore_visuals: config.ignore_visuals,
289            ignore_stylesheets: config.ignore_stylesheets,
290            ignore_javascript: config.ignore_javascript,
291            ignore_analytics: config.ignore_analytics,
292            ignore_prefetch: config.ignore_prefetch,
293            ignore_ads: config.ignore_ads,
294            extra_headers: config.extra_headers.clone(),
295            only_html: config.only_html,
296            service_worker_enabled: config.service_worker_enabled,
297            created_first_target: false,
298            intercept_manager: config.intercept_manager,
299            max_bytes_allowed: config.max_bytes_allowed,
300            max_redirects: config.max_redirects,
301            max_main_frame_navigations: config.max_main_frame_navigations,
302            whitelist_patterns: config.whitelist_patterns.clone(),
303            blacklist_patterns: config.blacklist_patterns.clone(),
304            #[cfg(feature = "adblock")]
305            adblock_filter_rules: config.adblock_filter_rules.clone(),
306            channel_capacity: config.channel_capacity,
307            connection_retries: config.connection_retries,
308        };
309
310        let fut = Handler::new(conn, rx, handler_config);
311        let browser_context = fut.default_browser_context().clone();
312
313        let browser = Self {
314            sender: tx,
315            config: Some(config),
316            child: Some(child),
317            debug_ws_url,
318            browser_context,
319        };
320
321        Ok((browser, fut))
322    }
323
324    /// Request to fetch all existing browser targets.
325    ///
326    /// By default, only targets launched after the browser connection are tracked
327    /// when connecting to a existing browser instance with the devtools websocket url
328    /// This function fetches existing targets on the browser and adds them as pages internally
329    ///
330    /// The pages are not guaranteed to be ready as soon as the function returns
331    /// You should wait a few millis if you need to use a page
332    /// Returns [TargetInfo]
333    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
334        let (tx, rx) = oneshot_channel();
335
336        self.sender.send(HandlerMessage::FetchTargets(tx)).await?;
337
338        rx.await?
339    }
340
341    /// Request for the browser to close completely.
342    ///
343    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
344    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
345    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
346    /// [`Browser::drop`] waits automatically if needed.
347    pub async fn close(&self) -> Result<CloseReturns> {
348        let (tx, rx) = oneshot_channel();
349
350        self.sender.send(HandlerMessage::CloseBrowser(tx)).await?;
351
352        rx.await?
353    }
354
355    /// Asynchronously wait for the spawned chromium instance to exit completely.
356    ///
357    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
358    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
359    /// "zombie" processes.
360    ///
361    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
362    /// connected to an existing browser through [`Browser::connect`])
363    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
364        if let Some(child) = self.child.as_mut() {
365            Ok(Some(child.wait().await?))
366        } else {
367            Ok(None)
368        }
369    }
370
371    /// If the spawned chromium instance has completely exited, wait for it.
372    ///
373    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
374    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
375    /// "zombie" processes.
376    ///
377    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
378    /// connected to an existing browser through [`Browser::connect`])
379    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
380        if let Some(child) = self.child.as_mut() {
381            child.try_wait()
382        } else {
383            Ok(None)
384        }
385    }
386
387    /// Get the spawned chromium instance
388    ///
389    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
390    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
391    ///
392    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
393    /// for the selected runtime.
394    ///
395    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
396    /// connected to an existing browser through [`Browser::connect`])
397    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
398        self.child.as_mut()
399    }
400
401    /// Has a browser instance launched on system.
402    pub fn has_child(&self) -> bool {
403        self.child.is_some()
404    }
405
406    /// Forcibly kill the spawned chromium instance
407    ///
408    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
409    /// process to exit to avoid "zombie" processes.
410    ///
411    /// This method is provided to help if the browser does not close by itself. You should prefer
412    /// to use [`Browser::close`].
413    ///
414    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
415    /// connected to an existing browser through [`Browser::connect`])
416    pub async fn kill(&mut self) -> Option<io::Result<()>> {
417        match self.child.as_mut() {
418            Some(child) => Some(child.kill().await),
419            None => None,
420        }
421    }
422
423    /// If not launched as incognito this creates a new incognito browser
424    /// context. After that this browser exists within the incognito session.
425    /// New pages created while being in incognito mode will also run in the
426    /// incognito context. Incognito contexts won't share cookies/cache with
427    /// other browser contexts.
428    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
429        if !self.is_incognito_configured() {
430            let browser_context_id = self
431                .create_browser_context(CreateBrowserContextParams::default())
432                .await?;
433            self.browser_context = BrowserContext::from(browser_context_id);
434            self.sender
435                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
436                .await?;
437        }
438
439        Ok(self)
440    }
441
442    /// If a incognito session was created with
443    /// `Browser::start_incognito_context` this disposes this context.
444    ///
445    /// # Note This will also dispose all pages that were running within the
446    /// incognito context.
447    pub async fn quit_incognito_context_base(
448        &self,
449        browser_context_id: BrowserContextId,
450    ) -> Result<&Self> {
451        self.dispose_browser_context(browser_context_id.clone())
452            .await?;
453        self.sender
454            .send(HandlerMessage::DisposeContext(BrowserContext::from(
455                browser_context_id,
456            )))
457            .await?;
458        Ok(self)
459    }
460
461    /// If a incognito session was created with
462    /// `Browser::start_incognito_context` this disposes this context.
463    ///
464    /// # Note This will also dispose all pages that were running within the
465    /// incognito context.
466    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
467        if let Some(id) = self.browser_context.take() {
468            let _ = self.quit_incognito_context_base(id).await;
469        }
470        Ok(self)
471    }
472
473    /// Whether incognito mode was configured from the start
474    fn is_incognito_configured(&self) -> bool {
475        self.config
476            .as_ref()
477            .map(|c| c.incognito)
478            .unwrap_or_default()
479    }
480
481    /// Returns the address of the websocket this browser is attached to
482    pub fn websocket_address(&self) -> &String {
483        &self.debug_ws_url
484    }
485
486    /// Whether the BrowserContext is incognito.
487    pub fn is_incognito(&self) -> bool {
488        self.is_incognito_configured() || self.browser_context.is_incognito()
489    }
490
491    /// The config of the spawned chromium instance if any.
492    pub fn config(&self) -> Option<&BrowserConfig> {
493        self.config.as_ref()
494    }
495
496    /// Create a new browser page
497    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
498        let (tx, rx) = oneshot_channel();
499        let mut params = params.into();
500
501        if let Some(id) = self.browser_context.id() {
502            if params.browser_context_id.is_none() {
503                params.browser_context_id = Some(id.clone());
504            }
505        }
506
507        let _ = self
508            .sender
509            .send(HandlerMessage::CreatePage(params, tx))
510            .await;
511
512        rx.await?
513    }
514
515    /// Version information about the browser
516    pub async fn version(&self) -> Result<GetVersionReturns> {
517        Ok(self.execute(GetVersionParams::default()).await?.result)
518    }
519
520    /// Returns the user agent of the browser
521    pub async fn user_agent(&self) -> Result<String> {
522        Ok(self.version().await?.user_agent)
523    }
524
525    /// Call a browser method.
526    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
527        let (tx, rx) = oneshot_channel();
528        let method = cmd.identifier();
529        let msg = CommandMessage::new(cmd, tx)?;
530
531        self.sender.send(HandlerMessage::Command(msg)).await?;
532        let resp = rx.await??;
533        to_command_response::<T>(resp, method)
534    }
535
536    /// Set permission settings for given embedding and embedded origins.
537    /// [PermissionDescriptor](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionDescriptor)
538    /// [PermissionSetting](https://chromedevtools.github.io/devtools-protocol/tot/Browser/#type-PermissionSetting)
539    pub async fn set_permission(
540        &self,
541        permission: PermissionDescriptor,
542        setting: PermissionSetting,
543        origin: Option<impl Into<String>>,
544        embedded_origin: Option<impl Into<String>>,
545        browser_context_id: Option<BrowserContextId>,
546    ) -> Result<&Self> {
547        self.execute(SetPermissionParams {
548            permission,
549            setting,
550            origin: origin.map(Into::into),
551            embedded_origin: embedded_origin.map(Into::into),
552            browser_context_id: browser_context_id.or_else(|| self.browser_context.id.clone()),
553        })
554        .await?;
555        Ok(self)
556    }
557
558    /// Convenience: set a permission for a single origin using the current browser context.
559    pub async fn set_permission_for_origin(
560        &self,
561        origin: impl Into<String>,
562        embedded_origin: Option<impl Into<String>>,
563        permission: PermissionDescriptor,
564        setting: PermissionSetting,
565    ) -> Result<&Self> {
566        self.set_permission(permission, setting, Some(origin), embedded_origin, None)
567            .await
568    }
569
570    /// "Reset" a permission override by setting it back to Prompt.
571    pub async fn reset_permission_for_origin(
572        &self,
573        origin: impl Into<String>,
574        embedded_origin: Option<impl Into<String>>,
575        permission: PermissionDescriptor,
576    ) -> Result<&Self> {
577        self.set_permission_for_origin(
578            origin,
579            embedded_origin,
580            permission,
581            PermissionSetting::Prompt,
582        )
583        .await
584    }
585
586    /// "Grant" all permissions.
587    pub async fn grant_all_permission_for_origin(
588        &self,
589        origin: impl Into<String>,
590        embedded_origin: Option<impl Into<String>>,
591        permission: PermissionDescriptor,
592    ) -> Result<&Self> {
593        self.set_permission_for_origin(
594            origin,
595            embedded_origin,
596            permission,
597            PermissionSetting::Granted,
598        )
599        .await
600    }
601
602    /// "Deny" all permissions.
603    pub async fn deny_all_permission_for_origin(
604        &self,
605        origin: impl Into<String>,
606        embedded_origin: Option<impl Into<String>>,
607        permission: PermissionDescriptor,
608    ) -> Result<&Self> {
609        self.set_permission_for_origin(
610            origin,
611            embedded_origin,
612            permission,
613            PermissionSetting::Denied,
614        )
615        .await
616    }
617
618    /// Return all of the pages of the browser
619    pub async fn pages(&self) -> Result<Vec<Page>> {
620        let (tx, rx) = oneshot_channel();
621        self.sender.send(HandlerMessage::GetPages(tx)).await?;
622        Ok(rx.await?)
623    }
624
625    /// Return page of given target_id
626    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
627        let (tx, rx) = oneshot_channel();
628        self.sender
629            .send(HandlerMessage::GetPage(target_id, tx))
630            .await?;
631        rx.await?.ok_or(CdpError::NotFound)
632    }
633
634    /// Set listener for browser event
635    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
636        let (tx, rx) = unbounded_channel();
637        self.sender
638            .send(HandlerMessage::AddEventListener(
639                EventListenerRequest::new::<T>(tx),
640            ))
641            .await?;
642
643        Ok(EventStream::new(rx))
644    }
645
646    /// Creates a new empty browser context.
647    pub async fn create_browser_context(
648        &mut self,
649        params: CreateBrowserContextParams,
650    ) -> Result<BrowserContextId> {
651        let response = self.execute(params).await?;
652
653        Ok(response.result.browser_context_id)
654    }
655
656    /// Returns all browser contexts created with Target.createBrowserContext method.
657    pub async fn get_browser_contexts(
658        &mut self,
659        params: GetBrowserContextsParams,
660    ) -> Result<GetBrowserContextsReturns> {
661        let response = self.execute(params).await?;
662        Ok(response.result)
663    }
664
665    /// Send a new empty browser context.
666    pub async fn send_new_context(
667        &mut self,
668        browser_context_id: BrowserContextId,
669    ) -> Result<&Self> {
670        self.browser_context = BrowserContext::from(browser_context_id);
671        self.sender
672            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
673            .await?;
674        Ok(self)
675    }
676
677    /// Deletes a browser context.
678    pub async fn dispose_browser_context(
679        &self,
680        browser_context_id: impl Into<BrowserContextId>,
681    ) -> Result<&Self> {
682        self.execute(DisposeBrowserContextParams::new(browser_context_id))
683            .await?;
684
685        Ok(self)
686    }
687
688    /// Clears cookies.
689    pub async fn clear_cookies(&self) -> Result<&Self> {
690        self.execute(ClearCookiesParams::default()).await?;
691        Ok(self)
692    }
693
694    /// Returns all browser cookies.
695    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
696        let cmd = GetCookiesParams {
697            browser_context_id: self.browser_context.id.clone(),
698        };
699
700        Ok(self.execute(cmd).await?.result.cookies)
701    }
702
703    /// Sets given cookies.
704    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
705        for cookie in &mut cookies {
706            if let Some(url) = cookie.url.as_ref() {
707                crate::page::validate_cookie_url(url)?;
708            }
709        }
710
711        let mut cookies_param = SetCookiesParams::new(cookies);
712
713        cookies_param.browser_context_id = self.browser_context.id.clone();
714
715        self.execute(cookies_param).await?;
716        Ok(self)
717    }
718}
719
720impl Drop for Browser {
721    fn drop(&mut self) {
722        if let Some(child) = self.child.as_mut() {
723            if let Ok(Some(_)) = child.try_wait() {
724                // Already exited, do nothing. Usually occurs after using the method close or kill.
725            } else {
726                // We set the `kill_on_drop` property for the child process, so no need to explicitely
727                // kill it here. It can't really be done anyway since the method is async.
728                //
729                // On Unix, the process will be reaped in the background by the runtime automatically
730                // so it won't leave any resources locked. It is, however, a better practice for the user to
731                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
732                // warn him here.
733                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
734            }
735        }
736    }
737}
738
739/// Resolve devtools WebSocket URL from the provided browser process
740///
741/// If an error occurs, it returns the browser's stderr output.
742///
743/// The URL resolution fails if:
744/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
745/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
746/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
747///   the browser's stderr: end of stream, invalid UTF-8, other
748async fn ws_url_from_output(
749    child_process: &mut Child,
750    timeout_fut: impl Future<Output = ()> + Unpin,
751) -> Result<String> {
752    use tokio::io::AsyncBufReadExt;
753    let stderr = match child_process.stderr.take() {
754        Some(stderr) => stderr,
755        None => {
756            return Err(CdpError::LaunchIo(
757                io::Error::new(io::ErrorKind::NotFound, "browser process has no stderr"),
758                BrowserStderr::new(Vec::new()),
759            ));
760        }
761    };
762    let mut stderr_bytes = Vec::<u8>::new();
763    let mut buf = tokio::io::BufReader::new(stderr);
764    let mut timeout_fut = timeout_fut;
765    loop {
766        tokio::select! {
767            _ = &mut timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
768            exit_status = child_process.wait() => {
769                return Err(match exit_status {
770                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
771                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
772                })
773            },
774            read_res = buf.read_until(b'\n', &mut stderr_bytes) => {
775                match read_res {
776                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
777                    Ok(byte_count) => {
778                        if byte_count == 0 {
779                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
780                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
781                        }
782                        let start_offset = stderr_bytes.len() - byte_count;
783                        let new_bytes = &stderr_bytes[start_offset..];
784                        match std::str::from_utf8(new_bytes) {
785                            Err(_) => {
786                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
787                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
788                            }
789                            Ok(line) => {
790                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
791                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
792                                        return Ok(ws.trim().to_string());
793                                    }
794                                }
795                            }
796                        }
797                    }
798                }
799            }
800        }
801    }
802}
803
804#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
805pub enum HeadlessMode {
806    /// The "headful" mode.
807    False,
808    /// The old headless mode.
809    #[default]
810    True,
811    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
812    New,
813}
814
815#[derive(Debug, Clone, Default)]
816pub struct BrowserConfig {
817    /// Determines whether to run headless version of the browser. Defaults to
818    /// true.
819    headless: HeadlessMode,
820    /// Determines whether to run the browser with a sandbox.
821    sandbox: bool,
822    /// Launch the browser with a specific window width and height.
823    window_size: Option<(u32, u32)>,
824    /// Launch the browser with a specific debugging port.
825    port: u16,
826    /// Path for Chrome or Chromium.
827    ///
828    /// If unspecified, the create will try to automatically detect a suitable
829    /// binary.
830    executable: std::path::PathBuf,
831
832    /// A list of Chrome extensions to load.
833    ///
834    /// An extension should be a path to a folder containing the extension code.
835    /// CRX files cannot be used directly and must be first extracted.
836    ///
837    /// Note that Chrome does not support loading extensions in headless-mode.
838    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
839    extensions: Vec<String>,
840
841    /// Environment variables to set for the Chromium process.
842    /// Passes value through to std::process::Command::envs.
843    pub process_envs: Option<HashMap<String, String>>,
844
845    /// Data dir for user data
846    pub user_data_dir: Option<PathBuf>,
847
848    /// Whether to launch the `Browser` in incognito mode.
849    incognito: bool,
850
851    /// Timeout duration for `Browser::launch`.
852    launch_timeout: Duration,
853
854    /// Ignore https errors, default is true.
855    ignore_https_errors: bool,
856    pub viewport: Option<Viewport>,
857    /// The duration after a request with no response should time out.
858    request_timeout: Duration,
859
860    /// Additional command line arguments to pass to the browser instance.
861    args: Vec<String>,
862
863    /// Whether to disable DEFAULT_ARGS or not, default is false.
864    disable_default_args: bool,
865
866    /// Whether to enable request interception.
867    pub request_intercept: bool,
868
869    /// Whether to enable cache.
870    pub cache_enabled: bool,
871    /// Whether to enable or disable Service Workers.
872    /// Disabling may reduce background network activity and caching effects.
873    pub service_worker_enabled: bool,
874    /// Whether to ignore image/visual requests during interception.
875    /// Can reduce bandwidth and speed up crawling when visuals are unnecessary.
876    pub ignore_visuals: bool,
877    /// Whether to ignore stylesheet (CSS) requests during interception.
878    /// Useful for content-only crawls.
879    pub ignore_stylesheets: bool,
880    /// Whether to ignore JavaScript requests during interception.
881    /// This still allows critical framework bundles to pass when applicable.
882    pub ignore_javascript: bool,
883    /// Whether to ignore analytics/telemetry requests during interception.
884    pub ignore_analytics: bool,
885    /// Ignore prefetch request.
886    pub ignore_prefetch: bool,
887    /// Whether to ignore ad network requests during interception.
888    pub ignore_ads: bool,
889    /// Extra headers.
890    pub extra_headers: Option<std::collections::HashMap<String, String>>,
891    /// Only html
892    pub only_html: bool,
893    /// The interception intercept manager.
894    pub intercept_manager: NetworkInterceptManager,
895    /// The max bytes to receive.
896    pub max_bytes_allowed: Option<u64>,
897    /// Cap on Document-type redirect hops before the navigation is aborted.
898    /// `None` disables enforcement; `Some(n)` mirrors `reqwest::redirect::Policy::limited(n)`.
899    pub max_redirects: Option<usize>,
900    /// Cap on main-frame cross-document navigations per `goto`. Defends against
901    /// JS / meta-refresh loops that bypass the HTTP redirect guard. `None`
902    /// disables the guard.
903    pub max_main_frame_navigations: Option<u32>,
904    /// Whitelist patterns to allow through the network.
905    pub whitelist_patterns: Option<Vec<String>>,
906    /// Blacklist patterns to block through the network.
907    pub blacklist_patterns: Option<Vec<String>>,
908    /// Extra ABP/uBO filter rules to load into the adblock engine (requires `adblock` feature).
909    /// These are merged with the built-in `ADBLOCK_PATTERNS` for richer blocking
910    /// (e.g. EasyList / EasyPrivacy content).
911    #[cfg(feature = "adblock")]
912    pub adblock_filter_rules: Option<Vec<String>>,
913    /// Capacity of the channel between browser handle and handler.
914    /// Defaults to 1000.
915    pub channel_capacity: usize,
916    /// Number of WebSocket connection retry attempts with exponential backoff.
917    /// Defaults to 4.
918    pub connection_retries: u32,
919}
920
921#[derive(Debug, Clone)]
922pub struct BrowserConfigBuilder {
923    /// Headless mode configuration for the browser.
924    headless: HeadlessMode,
925    /// Whether to run the browser with a sandbox.
926    sandbox: bool,
927    /// Optional initial browser window size `(width, height)`.
928    window_size: Option<(u32, u32)>,
929    /// DevTools debugging port to bind to.
930    port: u16,
931    /// Optional explicit path to the Chrome/Chromium executable.
932    /// If `None`, auto-detection may be attempted based on `executation_detection`.
933    executable: Option<PathBuf>,
934    /// Controls auto-detection behavior for finding a Chrome/Chromium binary.
935    executation_detection: DetectionOptions,
936    /// List of unpacked extensions (directories) to load at startup.
937    extensions: Vec<String>,
938    /// Environment variables to set on the spawned Chromium process.
939    process_envs: Option<HashMap<String, String>>,
940    /// User data directory to persist browser state, or `None` for ephemeral.
941    user_data_dir: Option<PathBuf>,
942    /// Whether to start the browser in incognito (off-the-record) mode.
943    incognito: bool,
944    /// Maximum time to wait for the browser to launch and become ready.
945    launch_timeout: Duration,
946    /// Whether to ignore HTTPS/TLS errors during navigation and requests.
947    ignore_https_errors: bool,
948    /// Default page viewport configuration applied on startup.
949    viewport: Option<Viewport>,
950    /// Timeout for individual network requests without response progress.
951    request_timeout: Duration,
952    /// Additional command-line flags passed directly to the browser process.
953    args: Vec<String>,
954    /// Disable the default argument set and use only the provided `args`.
955    disable_default_args: bool,
956    /// Enable Network.requestInterception for request filtering/handling.
957    request_intercept: bool,
958    /// Enable the browser cache for navigations and subresources.
959    cache_enabled: bool,
960    /// Enable/disable Service Workers.
961    service_worker_enabled: bool,
962    /// Drop image/visual requests when interception is enabled.
963    ignore_visuals: bool,
964    /// Drop ad network requests when interception is enabled.
965    ignore_ads: bool,
966    /// Drop JavaScript requests when interception is enabled.
967    ignore_javascript: bool,
968    /// Drop stylesheet (CSS) requests when interception is enabled.
969    ignore_stylesheets: bool,
970    /// Ignore prefetch domains.
971    ignore_prefetch: bool,
972    /// Drop analytics/telemetry requests when interception is enabled.
973    ignore_analytics: bool,
974    /// If `true`, limit fetching to HTML documents.
975    only_html: bool,
976    /// Extra HTTP headers to include with every request.
977    extra_headers: Option<std::collections::HashMap<String, String>>,
978    /// Network interception manager used to configure filtering behavior.
979    intercept_manager: NetworkInterceptManager,
980    /// Optional upper bound on bytes that may be received (per session/run).
981    max_bytes_allowed: Option<u64>,
982    /// Optional cap on Document redirect hops per navigation (`None` = disabled).
983    max_redirects: Option<usize>,
984    /// Optional cap on main-frame cross-document navigations per goto.
985    max_main_frame_navigations: Option<u32>,
986    /// Whitelist patterns to allow through the network.
987    whitelist_patterns: Option<Vec<String>>,
988    /// Blacklist patterns to block through the network.
989    blacklist_patterns: Option<Vec<String>>,
990    /// Extra ABP/uBO filter rules for the adblock engine.
991    #[cfg(feature = "adblock")]
992    adblock_filter_rules: Option<Vec<String>>,
993    /// Capacity of the channel between browser handle and handler.
994    channel_capacity: usize,
995    /// Number of WebSocket connection retry attempts.
996    connection_retries: u32,
997}
998
999impl BrowserConfig {
1000    /// Browser builder default config.
1001    pub fn builder() -> BrowserConfigBuilder {
1002        BrowserConfigBuilder::default()
1003    }
1004
1005    /// Launch with the executable path.
1006    pub fn with_executable(path: impl AsRef<Path>) -> Self {
1007        // SAFETY: build() only fails when no executable is provided,
1008        // but we always provide one via chrome_executable().
1009        Self::builder().chrome_executable(path).build().unwrap()
1010    }
1011}
1012
1013impl Default for BrowserConfigBuilder {
1014    fn default() -> Self {
1015        Self {
1016            headless: HeadlessMode::True,
1017            sandbox: true,
1018            window_size: None,
1019            port: 0,
1020            executable: None,
1021            executation_detection: DetectionOptions::default(),
1022            extensions: Vec::new(),
1023            process_envs: None,
1024            user_data_dir: None,
1025            incognito: false,
1026            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
1027            ignore_https_errors: true,
1028            viewport: Some(Default::default()),
1029            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1030            args: Vec::new(),
1031            disable_default_args: false,
1032            request_intercept: false,
1033            cache_enabled: true,
1034            ignore_visuals: false,
1035            ignore_ads: false,
1036            ignore_javascript: false,
1037            ignore_analytics: false,
1038            ignore_stylesheets: false,
1039            ignore_prefetch: true,
1040            only_html: false,
1041            extra_headers: Default::default(),
1042            service_worker_enabled: true,
1043            intercept_manager: NetworkInterceptManager::Unknown,
1044            max_bytes_allowed: None,
1045            max_redirects: None,
1046            max_main_frame_navigations: None,
1047            whitelist_patterns: None,
1048            blacklist_patterns: None,
1049            #[cfg(feature = "adblock")]
1050            adblock_filter_rules: None,
1051            channel_capacity: 4096,
1052            connection_retries: crate::conn::DEFAULT_CONNECTION_RETRIES,
1053        }
1054    }
1055}
1056
1057impl BrowserConfigBuilder {
1058    /// Configure window size.
1059    pub fn window_size(mut self, width: u32, height: u32) -> Self {
1060        self.window_size = Some((width, height));
1061        self
1062    }
1063    /// Configure sandboxing.
1064    pub fn no_sandbox(mut self) -> Self {
1065        self.sandbox = false;
1066        self
1067    }
1068    /// Configure the launch to start non headless.
1069    pub fn with_head(mut self) -> Self {
1070        self.headless = HeadlessMode::False;
1071        self
1072    }
1073    /// Configure the launch with the new headless mode.
1074    pub fn new_headless_mode(mut self) -> Self {
1075        self.headless = HeadlessMode::New;
1076        self
1077    }
1078    /// Configure the launch with headless.
1079    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
1080        self.headless = mode;
1081        self
1082    }
1083    /// Configure the launch in incognito.
1084    pub fn incognito(mut self) -> Self {
1085        self.incognito = true;
1086        self
1087    }
1088
1089    pub fn respect_https_errors(mut self) -> Self {
1090        self.ignore_https_errors = false;
1091        self
1092    }
1093
1094    pub fn port(mut self, port: u16) -> Self {
1095        self.port = port;
1096        self
1097    }
1098
1099    pub fn with_max_bytes_allowed(mut self, max_bytes_allowed: Option<u64>) -> Self {
1100        self.max_bytes_allowed = max_bytes_allowed;
1101        self
1102    }
1103
1104    /// Cap the number of Document-type redirect hops per navigation.
1105    ///
1106    /// `None` disables enforcement (default, preserves Chromium's own ~20-hop cap).
1107    /// `Some(n)` aborts once a navigation chain exceeds `n` by emitting
1108    /// `net::ERR_TOO_MANY_REDIRECTS` and calling `Page.stopLoading`.
1109    pub fn with_max_redirects(mut self, max_redirects: Option<usize>) -> Self {
1110        self.max_redirects = max_redirects;
1111        self
1112    }
1113
1114    /// Cap the number of main-frame cross-document navigations allowed per
1115    /// `goto` call.
1116    ///
1117    /// Defends against JS `location.href` / meta-refresh loops that bypass
1118    /// HTTP-level redirect detection — each hop looks like a fresh document
1119    /// to Chromium, so `with_max_redirects` alone cannot catch them. `None`
1120    /// disables the guard (default).
1121    pub fn with_max_main_frame_navigations(mut self, cap: Option<u32>) -> Self {
1122        self.max_main_frame_navigations = cap;
1123        self
1124    }
1125
1126    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
1127        self.launch_timeout = timeout;
1128        self
1129    }
1130
1131    pub fn request_timeout(mut self, timeout: Duration) -> Self {
1132        self.request_timeout = timeout;
1133        self
1134    }
1135
1136    /// Configures the viewport of the browser, which defaults to `800x600`.
1137    /// `None` disables viewport emulation (i.e., it uses the browsers default
1138    /// configuration, which fills the available space. This is similar to what
1139    /// Playwright does when you provide `null` as the value of its `viewport`
1140    /// option).
1141    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
1142        self.viewport = viewport.into();
1143        self
1144    }
1145
1146    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
1147        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
1148        self
1149    }
1150
1151    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
1152        self.executable = Some(path.as_ref().to_path_buf());
1153        self
1154    }
1155
1156    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
1157        self.executation_detection = options;
1158        self
1159    }
1160
1161    pub fn extension(mut self, extension: impl Into<String>) -> Self {
1162        self.extensions.push(extension.into());
1163        self
1164    }
1165
1166    pub fn extensions<I, S>(mut self, extensions: I) -> Self
1167    where
1168        I: IntoIterator<Item = S>,
1169        S: Into<String>,
1170    {
1171        for ext in extensions {
1172            self.extensions.push(ext.into());
1173        }
1174        self
1175    }
1176
1177    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
1178        self.process_envs
1179            .get_or_insert(HashMap::new())
1180            .insert(key.into(), val.into());
1181        self
1182    }
1183
1184    pub fn envs<I, K, V>(mut self, envs: I) -> Self
1185    where
1186        I: IntoIterator<Item = (K, V)>,
1187        K: Into<String>,
1188        V: Into<String>,
1189    {
1190        self.process_envs
1191            .get_or_insert(HashMap::new())
1192            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
1193        self
1194    }
1195
1196    pub fn arg(mut self, arg: impl Into<String>) -> Self {
1197        self.args.push(arg.into());
1198        self
1199    }
1200
1201    pub fn args<I, S>(mut self, args: I) -> Self
1202    where
1203        I: IntoIterator<Item = S>,
1204        S: Into<String>,
1205    {
1206        for arg in args {
1207            self.args.push(arg.into());
1208        }
1209        self
1210    }
1211
1212    pub fn disable_default_args(mut self) -> Self {
1213        self.disable_default_args = true;
1214        self
1215    }
1216
1217    pub fn enable_request_intercept(mut self) -> Self {
1218        self.request_intercept = true;
1219        self
1220    }
1221
1222    pub fn disable_request_intercept(mut self) -> Self {
1223        self.request_intercept = false;
1224        self
1225    }
1226
1227    pub fn enable_cache(mut self) -> Self {
1228        self.cache_enabled = true;
1229        self
1230    }
1231
1232    pub fn disable_cache(mut self) -> Self {
1233        self.cache_enabled = false;
1234        self
1235    }
1236
1237    /// Set service worker enabled.
1238    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
1239        self.service_worker_enabled = bypass;
1240        self
1241    }
1242
1243    /// Set extra request headers.
1244    pub fn set_extra_headers(
1245        mut self,
1246        headers: Option<std::collections::HashMap<String, String>>,
1247    ) -> Self {
1248        self.extra_headers = headers;
1249        self
1250    }
1251
1252    /// Set whitelist patterns to allow through network interception allowing.
1253    pub fn set_whitelist_patterns(mut self, whitelist_patterns: Option<Vec<String>>) -> Self {
1254        self.whitelist_patterns = whitelist_patterns;
1255        self
1256    }
1257
1258    /// Set blacklist patterns to block through network interception.
1259    pub fn set_blacklist_patterns(mut self, blacklist_patterns: Option<Vec<String>>) -> Self {
1260        self.blacklist_patterns = blacklist_patterns;
1261        self
1262    }
1263
1264    /// Set extra ABP/uBO filter rules for the adblock engine.
1265    /// Pass EasyList/EasyPrivacy content lines for richer blocking coverage.
1266    #[cfg(feature = "adblock")]
1267    pub fn set_adblock_filter_rules(mut self, rules: Vec<String>) -> Self {
1268        self.adblock_filter_rules = Some(rules);
1269        self
1270    }
1271
1272    /// Set the capacity of the channel between browser handle and handler.
1273    /// Defaults to 1000.
1274    pub fn channel_capacity(mut self, capacity: usize) -> Self {
1275        self.channel_capacity = capacity;
1276        self
1277    }
1278
1279    /// Set the number of WebSocket connection retry attempts with exponential backoff.
1280    /// Defaults to 4. Set to 0 for a single attempt with no retries.
1281    pub fn connection_retries(mut self, retries: u32) -> Self {
1282        self.connection_retries = retries;
1283        self
1284    }
1285
1286    /// Build the browser.
1287    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1288        let executable = if let Some(e) = self.executable {
1289            e
1290        } else {
1291            detection::default_executable(self.executation_detection)?
1292        };
1293
1294        Ok(BrowserConfig {
1295            headless: self.headless,
1296            sandbox: self.sandbox,
1297            window_size: self.window_size,
1298            port: self.port,
1299            executable,
1300            extensions: self.extensions,
1301            process_envs: self.process_envs,
1302            user_data_dir: self.user_data_dir,
1303            incognito: self.incognito,
1304            launch_timeout: self.launch_timeout,
1305            ignore_https_errors: self.ignore_https_errors,
1306            viewport: self.viewport,
1307            request_timeout: self.request_timeout,
1308            args: self.args,
1309            disable_default_args: self.disable_default_args,
1310            request_intercept: self.request_intercept,
1311            cache_enabled: self.cache_enabled,
1312            ignore_visuals: self.ignore_visuals,
1313            ignore_ads: self.ignore_ads,
1314            ignore_javascript: self.ignore_javascript,
1315            ignore_analytics: self.ignore_analytics,
1316            ignore_stylesheets: self.ignore_stylesheets,
1317            ignore_prefetch: self.ignore_prefetch,
1318            extra_headers: self.extra_headers,
1319            only_html: self.only_html,
1320            intercept_manager: self.intercept_manager,
1321            service_worker_enabled: self.service_worker_enabled,
1322            max_bytes_allowed: self.max_bytes_allowed,
1323            max_redirects: self.max_redirects,
1324            max_main_frame_navigations: self.max_main_frame_navigations,
1325            whitelist_patterns: self.whitelist_patterns,
1326            blacklist_patterns: self.blacklist_patterns,
1327            #[cfg(feature = "adblock")]
1328            adblock_filter_rules: self.adblock_filter_rules,
1329            channel_capacity: self.channel_capacity,
1330            connection_retries: self.connection_retries,
1331        })
1332    }
1333}
1334
1335impl BrowserConfig {
1336    pub fn launch(&self) -> io::Result<Child> {
1337        let mut cmd = async_process::Command::new(&self.executable);
1338
1339        if self.disable_default_args {
1340            cmd.args(&self.args);
1341        } else {
1342            cmd.args(DEFAULT_ARGS).args(&self.args);
1343        }
1344
1345        if !self
1346            .args
1347            .iter()
1348            .any(|arg| arg.contains("--remote-debugging-port="))
1349        {
1350            cmd.arg(format!("--remote-debugging-port={}", self.port));
1351        }
1352
1353        cmd.args(
1354            self.extensions
1355                .iter()
1356                .map(|e| format!("--load-extension={e}")),
1357        );
1358
1359        if let Some(ref user_data) = self.user_data_dir {
1360            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1361        } else {
1362            // If the user did not specify a data directory, this would default to the systems default
1363            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1364            // a temp dir just for chromiumoxide instead.
1365            cmd.arg(format!(
1366                "--user-data-dir={}",
1367                std::env::temp_dir().join("chromiumoxide-runner").display()
1368            ));
1369        }
1370
1371        if let Some((width, height)) = self.window_size {
1372            cmd.arg(format!("--window-size={width},{height}"));
1373        }
1374
1375        if !self.sandbox {
1376            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1377        }
1378
1379        match self.headless {
1380            HeadlessMode::False => (),
1381            HeadlessMode::True => {
1382                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1383            }
1384            HeadlessMode::New => {
1385                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1386            }
1387        }
1388
1389        if self.incognito {
1390            cmd.arg("--incognito");
1391        }
1392
1393        if let Some(ref envs) = self.process_envs {
1394            cmd.envs(envs);
1395        }
1396        cmd.stderr(Stdio::piped()).spawn()
1397    }
1398}
1399
1400/// Returns the path to Chrome's executable.
1401///
1402/// If the `CHROME` environment variable is set, `default_executable` will
1403/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1404/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1405/// searched for in standard places. If that fails,
1406/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1407/// Windows) is consulted. If all of the above fail, an error is returned.
1408#[deprecated(note = "Use detection::default_executable instead")]
1409pub fn default_executable() -> Result<std::path::PathBuf, String> {
1410    let options = DetectionOptions {
1411        msedge: false,
1412        unstable: false,
1413    };
1414    detection::default_executable(options)
1415}
1416
1417/// These are passed to the Chrome binary by default.
1418/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1419static DEFAULT_ARGS: [&str; 26] = [
1420    "--disable-background-networking",
1421    "--enable-features=NetworkService,NetworkServiceInProcess",
1422    "--disable-background-timer-throttling",
1423    "--disable-backgrounding-occluded-windows",
1424    "--disable-breakpad",
1425    "--disable-client-side-phishing-detection",
1426    "--disable-component-extensions-with-background-pages",
1427    "--disable-default-apps",
1428    "--disable-dev-shm-usage",
1429    "--disable-extensions",
1430    "--disable-features=TranslateUI",
1431    "--disable-hang-monitor",
1432    "--disable-ipc-flooding-protection",
1433    "--disable-popup-blocking",
1434    "--disable-prompt-on-repost",
1435    "--disable-renderer-backgrounding",
1436    "--disable-sync",
1437    "--force-color-profile=srgb",
1438    "--metrics-recording-only",
1439    "--no-first-run",
1440    "--enable-automation",
1441    "--password-store=basic",
1442    "--use-mock-keychain",
1443    "--enable-blink-features=IdleDetection",
1444    "--lang=en_US",
1445    "--disable-blink-features=AutomationControlled",
1446];