chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use futures::channel::mpsc::{channel, unbounded, Sender};
11use futures::channel::oneshot::channel as oneshot_channel;
12use futures::select;
13use futures::SinkExt;
14
15use crate::async_process::{self, Child, ExitStatus, Stdio};
16use crate::cmd::{to_command_response, CommandMessage};
17use crate::conn::Connection;
18use crate::detection::{self, DetectionOptions};
19use crate::error::{BrowserStderr, CdpError, Result};
20use crate::handler::blockers::intercept_manager::NetworkInterceptManager;
21use crate::handler::browser::BrowserContext;
22use crate::handler::viewport::Viewport;
23use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
24use crate::listeners::{EventListenerRequest, EventStream};
25use crate::page::Page;
26use crate::utils;
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams, TargetId,
36    TargetInfo,
37};
38use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
39use chromiumoxide_types::*;
40
41/// Default `Browser::launch` timeout in MS
42pub const LAUNCH_TIMEOUT: u64 = 20_000;
43
44lazy_static::lazy_static! {
45    /// The request client to get the web socket url.
46    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
47        .timeout(Duration::from_secs(60))
48        .default_headers({
49            let mut m = HeaderMap::new();
50
51            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
52
53            m
54        })
55        .build()
56        .unwrap();
57}
58
59/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
60#[derive(Debug)]
61pub struct Browser {
62    /// The `Sender` to send messages to the connection handler that drives the
63    /// websocket
64    pub(crate) sender: Sender<HandlerMessage>,
65    /// How the spawned chromium instance was configured, if any
66    config: Option<BrowserConfig>,
67    /// The spawned chromium instance
68    child: Option<Child>,
69    /// The debug web socket url of the chromium instance
70    debug_ws_url: String,
71    /// The context of the browser
72    pub browser_context: BrowserContext,
73}
74
75/// Browser connection information.
76#[derive(serde::Deserialize, Debug, Default)]
77pub struct BrowserConnection {
78    #[serde(rename = "Browser")]
79    /// The browser name
80    pub browser: String,
81    #[serde(rename = "Protocol-Version")]
82    /// Browser version
83    pub protocol_version: String,
84    #[serde(rename = "User-Agent")]
85    /// User Agent used by default.
86    pub user_agent: String,
87    #[serde(rename = "V8-Version")]
88    /// The v8 engine version
89    pub v8_version: String,
90    #[serde(rename = "WebKit-Version")]
91    /// Webkit version
92    pub webkit_version: String,
93    #[serde(rename = "webSocketDebuggerUrl")]
94    /// Remote debugging address
95    pub web_socket_debugger_url: String,
96}
97
98impl Browser {
99    /// Connect to an already running chromium instance via the given URL.
100    ///
101    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
102    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
103        Self::connect_with_config(url, HandlerConfig::default()).await
104    }
105
106    // Connect to an already running chromium instance with a given `HandlerConfig`.
107    ///
108    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
109    pub async fn connect_with_config(
110        url: impl Into<String>,
111        config: HandlerConfig,
112    ) -> Result<(Self, Handler)> {
113        let mut debug_ws_url = url.into();
114
115        if debug_ws_url.starts_with("http") {
116            match REQUEST_CLIENT
117                .get(
118                    if debug_ws_url.ends_with("/json/version")
119                        || debug_ws_url.ends_with("/json/version/")
120                    {
121                        debug_ws_url.to_owned()
122                    } else {
123                        format!(
124                            "{}{}json/version",
125                            &debug_ws_url,
126                            if debug_ws_url.ends_with('/') { "" } else { "/" }
127                        )
128                    },
129                )
130                .send()
131                .await
132            {
133                Ok(req) => {
134                    let connection: BrowserConnection =
135                        crate::serde_json::from_slice(&req.bytes().await.unwrap_or_default())
136                            .unwrap_or_default();
137                    if !connection.web_socket_debugger_url.is_empty() {
138                        debug_ws_url = connection.web_socket_debugger_url;
139                    }
140                }
141                Err(_) => return Err(CdpError::NoResponse),
142            }
143        }
144
145        let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
146
147        let (tx, rx) = channel(1000);
148
149        let handler_config = BrowserConfig {
150            ignore_https_errors: config.ignore_https_errors,
151            viewport: config.viewport.clone(),
152            request_timeout: config.request_timeout,
153            request_intercept: config.request_intercept,
154            cache_enabled: config.cache_enabled,
155            ignore_visuals: config.ignore_visuals,
156            ignore_stylesheets: config.ignore_stylesheets,
157            ignore_javascript: config.ignore_javascript,
158            ignore_analytics: config.ignore_analytics,
159            ignore_ads: config.ignore_ads,
160            extra_headers: config.extra_headers.clone(),
161            only_html: config.only_html,
162            service_worker_enabled: config.service_worker_enabled,
163            intercept_manager: config.intercept_manager,
164            ..Default::default()
165        };
166
167        let fut = Handler::new(conn, rx, config);
168        let browser_context = fut.default_browser_context().clone();
169
170        let browser = Self {
171            sender: tx,
172            config: Some(handler_config),
173            child: None,
174            debug_ws_url,
175            browser_context,
176        };
177
178        Ok((browser, fut))
179    }
180
181    /// Launches a new instance of `chromium` in the background and attaches to
182    /// its debug web socket.
183    ///
184    /// This fails when no chromium executable could be detected.
185    ///
186    /// This fails if no web socket url could be detected from the child
187    /// processes stderr for more than the configured `launch_timeout`
188    /// (20 seconds by default).
189    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
190        // Canonalize paths to reduce issues with sandboxing
191        config.executable = utils::canonicalize_except_snap(config.executable).await?;
192
193        // Launch a new chromium instance
194        let mut child = config.launch()?;
195
196        /// Faillible initialization to run once the child process is created.
197        ///
198        /// All faillible calls must be executed inside this function. This ensures that all
199        /// errors are caught and that the child process is properly cleaned-up.
200        async fn with_child(
201            config: &BrowserConfig,
202            child: &mut Child,
203        ) -> Result<(String, Connection<CdpEventMessage>)> {
204            let dur = config.launch_timeout;
205            let timeout_fut = Box::pin(tokio::time::sleep(dur));
206
207            // extract the ws:
208            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
209            let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
210            Ok((debug_ws_url, conn))
211        }
212
213        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
214            Ok(conn) => conn,
215            Err(e) => {
216                // An initialization error occurred, clean up the process
217                if let Ok(Some(_)) = child.try_wait() {
218                    // already exited, do nothing, may happen if the browser crashed
219                } else {
220                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
221                    child.kill().await.expect("`Browser::launch` failed but could not clean-up the child process (`kill`)");
222                    child.wait().await.expect("`Browser::launch` failed but could not clean-up the child process (`wait`)");
223                }
224                return Err(e);
225            }
226        };
227
228        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
229        // child process.
230
231        let (tx, rx) = channel(1000);
232
233        let handler_config = HandlerConfig {
234            ignore_https_errors: config.ignore_https_errors,
235            viewport: config.viewport.clone(),
236            context_ids: Vec::new(),
237            request_timeout: config.request_timeout,
238            request_intercept: config.request_intercept,
239            cache_enabled: config.cache_enabled,
240            ignore_visuals: config.ignore_visuals,
241            ignore_stylesheets: config.ignore_stylesheets,
242            ignore_javascript: config.ignore_javascript,
243            ignore_analytics: config.ignore_analytics,
244            ignore_ads: config.ignore_ads,
245            extra_headers: config.extra_headers.clone(),
246            only_html: config.only_html,
247            service_worker_enabled: config.service_worker_enabled,
248            created_first_target: false,
249            intercept_manager: config.intercept_manager,
250        };
251
252        let fut = Handler::new(conn, rx, handler_config);
253        let browser_context = fut.default_browser_context().clone();
254
255        let browser = Self {
256            sender: tx,
257            config: Some(config),
258            child: Some(child),
259            debug_ws_url,
260            browser_context,
261        };
262
263        Ok((browser, fut))
264    }
265
266    /// Request to fetch all existing browser targets.
267    ///
268    /// By default, only targets launched after the browser connection are tracked
269    /// when connecting to a existing browser instance with the devtools websocket url
270    /// This function fetches existing targets on the browser and adds them as pages internally
271    ///
272    /// The pages are not guaranteed to be ready as soon as the function returns
273    /// You should wait a few millis if you need to use a page
274    /// Returns [TargetInfo]
275    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
276        let (tx, rx) = oneshot_channel();
277
278        self.sender
279            .clone()
280            .send(HandlerMessage::FetchTargets(tx))
281            .await?;
282
283        rx.await?
284    }
285
286    /// Request for the browser to close completely.
287    ///
288    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
289    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
290    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
291    /// [`Browser::drop`] waits automatically if needed.
292    pub async fn close(&self) -> Result<CloseReturns> {
293        let (tx, rx) = oneshot_channel();
294
295        self.sender
296            .clone()
297            .send(HandlerMessage::CloseBrowser(tx))
298            .await?;
299
300        rx.await?
301    }
302
303    /// Asynchronously wait for the spawned chromium instance to exit completely.
304    ///
305    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
306    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
307    /// "zombie" processes.
308    ///
309    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
310    /// connected to an existing browser through [`Browser::connect`])
311    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
312        if let Some(child) = self.child.as_mut() {
313            Ok(Some(child.wait().await?))
314        } else {
315            Ok(None)
316        }
317    }
318
319    /// If the spawned chromium instance has completely exited, wait for it.
320    ///
321    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
322    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
323    /// "zombie" processes.
324    ///
325    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
326    /// connected to an existing browser through [`Browser::connect`])
327    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
328        if let Some(child) = self.child.as_mut() {
329            child.try_wait()
330        } else {
331            Ok(None)
332        }
333    }
334
335    /// Get the spawned chromium instance
336    ///
337    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
338    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
339    ///
340    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
341    /// for the selected runtime.
342    ///
343    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
344    /// connected to an existing browser through [`Browser::connect`])
345    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
346        self.child.as_mut()
347    }
348
349    /// Has a browser instance launched on system.
350    pub fn has_child(&self) -> bool {
351        self.child.is_some()
352    }
353
354    /// Forcibly kill the spawned chromium instance
355    ///
356    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
357    /// process to exit to avoid "zombie" processes.
358    ///
359    /// This method is provided to help if the browser does not close by itself. You should prefer
360    /// to use [`Browser::close`].
361    ///
362    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
363    /// connected to an existing browser through [`Browser::connect`])
364    pub async fn kill(&mut self) -> Option<io::Result<()>> {
365        match self.child.as_mut() {
366            Some(child) => Some(child.kill().await),
367            None => None,
368        }
369    }
370
371    /// If not launched as incognito this creates a new incognito browser
372    /// context. After that this browser exists within the incognito session.
373    /// New pages created while being in incognito mode will also run in the
374    /// incognito context. Incognito contexts won't share cookies/cache with
375    /// other browser contexts.
376    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
377        if !self.is_incognito_configured() {
378            let browser_context_id = self
379                .create_browser_context(CreateBrowserContextParams::default())
380                .await?;
381            self.browser_context = BrowserContext::from(browser_context_id);
382            self.sender
383                .clone()
384                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
385                .await?;
386        }
387
388        Ok(self)
389    }
390
391    /// If a incognito session was created with
392    /// `Browser::start_incognito_context` this disposes this context.
393    ///
394    /// # Note This will also dispose all pages that were running within the
395    /// incognito context.
396    pub async fn quit_incognito_context_base(
397        &self,
398        browser_context_id: BrowserContextId,
399    ) -> Result<&Self> {
400        self.dispose_browser_context(browser_context_id.clone())
401            .await?;
402        self.sender
403            .clone()
404            .send(HandlerMessage::DisposeContext(BrowserContext::from(
405                browser_context_id,
406            )))
407            .await?;
408        Ok(self)
409    }
410
411    /// If a incognito session was created with
412    /// `Browser::start_incognito_context` this disposes this context.
413    ///
414    /// # Note This will also dispose all pages that were running within the
415    /// incognito context.
416    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
417        if let Some(id) = self.browser_context.take() {
418            let _ = self.quit_incognito_context_base(id).await;
419        }
420        Ok(self)
421    }
422
423    /// Whether incognito mode was configured from the start
424    fn is_incognito_configured(&self) -> bool {
425        self.config
426            .as_ref()
427            .map(|c| c.incognito)
428            .unwrap_or_default()
429    }
430
431    /// Returns the address of the websocket this browser is attached to
432    pub fn websocket_address(&self) -> &String {
433        &self.debug_ws_url
434    }
435
436    /// Whether the BrowserContext is incognito.
437    pub fn is_incognito(&self) -> bool {
438        self.is_incognito_configured() || self.browser_context.is_incognito()
439    }
440
441    /// The config of the spawned chromium instance if any.
442    pub fn config(&self) -> Option<&BrowserConfig> {
443        self.config.as_ref()
444    }
445
446    /// Create a new browser page
447    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
448        let (tx, rx) = oneshot_channel();
449        let mut params = params.into();
450
451        if let Some(id) = self.browser_context.id() {
452            if params.browser_context_id.is_none() {
453                params.browser_context_id = Some(id.clone());
454            }
455        }
456
457        let _ = self
458            .sender
459            .clone()
460            .send(HandlerMessage::CreatePage(params, tx))
461            .await;
462
463        rx.await?
464    }
465
466    /// Version information about the browser
467    pub async fn version(&self) -> Result<GetVersionReturns> {
468        Ok(self.execute(GetVersionParams::default()).await?.result)
469    }
470
471    /// Returns the user agent of the browser
472    pub async fn user_agent(&self) -> Result<String> {
473        Ok(self.version().await?.user_agent)
474    }
475
476    /// Call a browser method.
477    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
478        let (tx, rx) = oneshot_channel();
479        let method = cmd.identifier();
480        let msg = CommandMessage::new(cmd, tx)?;
481
482        self.sender
483            .clone()
484            .send(HandlerMessage::Command(msg))
485            .await?;
486        let resp = rx.await??;
487        to_command_response::<T>(resp, method)
488    }
489
490    /// Return all of the pages of the browser
491    pub async fn pages(&self) -> Result<Vec<Page>> {
492        let (tx, rx) = oneshot_channel();
493        self.sender
494            .clone()
495            .send(HandlerMessage::GetPages(tx))
496            .await?;
497        Ok(rx.await?)
498    }
499
500    /// Return page of given target_id
501    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
502        let (tx, rx) = oneshot_channel();
503        self.sender
504            .clone()
505            .send(HandlerMessage::GetPage(target_id, tx))
506            .await?;
507        rx.await?.ok_or(CdpError::NotFound)
508    }
509
510    /// Set listener for browser event
511    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
512        let (tx, rx) = unbounded();
513        self.sender
514            .clone()
515            .send(HandlerMessage::AddEventListener(
516                EventListenerRequest::new::<T>(tx),
517            ))
518            .await?;
519
520        Ok(EventStream::new(rx))
521    }
522
523    /// Creates a new empty browser context.
524    pub async fn create_browser_context(
525        &mut self,
526        params: CreateBrowserContextParams,
527    ) -> Result<BrowserContextId> {
528        let response = self.execute(params).await?;
529        Ok(response.result.browser_context_id)
530    }
531
532    /// Send a new empty browser context.
533    pub async fn send_new_context(&mut self, browser_context_id: BrowserContextId) -> Result<()> {
534        self.browser_context = BrowserContext::from(browser_context_id);
535        self.sender
536            .clone()
537            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
538            .await?;
539        Ok(())
540    }
541
542    /// Deletes a browser context.
543    pub async fn dispose_browser_context(
544        &self,
545        browser_context_id: impl Into<BrowserContextId>,
546    ) -> Result<()> {
547        self.execute(DisposeBrowserContextParams::new(browser_context_id))
548            .await?;
549
550        Ok(())
551    }
552
553    /// Clears cookies.
554    pub async fn clear_cookies(&self) -> Result<()> {
555        self.execute(ClearCookiesParams::default()).await?;
556        Ok(())
557    }
558
559    /// Returns all browser cookies.
560    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
561        let mut cmd = GetCookiesParams::default();
562
563        cmd.browser_context_id = self.browser_context.id.clone();
564
565        Ok(self.execute(cmd).await?.result.cookies)
566    }
567
568    /// Sets given cookies.
569    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
570        for cookie in &mut cookies {
571            if let Some(url) = cookie.url.as_ref() {
572                crate::page::validate_cookie_url(url)?;
573            }
574        }
575
576        let mut cookies_param = SetCookiesParams::new(cookies);
577
578        cookies_param.browser_context_id = self.browser_context.id.clone();
579
580        self.execute(cookies_param).await?;
581        Ok(self)
582    }
583}
584
585impl Drop for Browser {
586    fn drop(&mut self) {
587        if let Some(child) = self.child.as_mut() {
588            if let Ok(Some(_)) = child.try_wait() {
589                // Already exited, do nothing. Usually occurs after using the method close or kill.
590            } else {
591                // We set the `kill_on_drop` property for the child process, so no need to explicitely
592                // kill it here. It can't really be done anyway since the method is async.
593                //
594                // On Unix, the process will be reaped in the background by the runtime automatically
595                // so it won't leave any resources locked. It is, however, a better practice for the user to
596                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
597                // warn him here.
598                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
599            }
600        }
601    }
602}
603
604/// Resolve devtools WebSocket URL from the provided browser process
605///
606/// If an error occurs, it returns the browser's stderr output.
607///
608/// The URL resolution fails if:
609/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
610/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
611/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
612///   the browser's stderr: end of stream, invalid UTF-8, other
613async fn ws_url_from_output(
614    child_process: &mut Child,
615    timeout_fut: impl Future<Output = ()> + Unpin,
616) -> Result<String> {
617    use futures::{AsyncBufReadExt, FutureExt};
618    let mut timeout_fut = timeout_fut.fuse();
619    let stderr = child_process.stderr.take().expect("no stderror");
620    let mut stderr_bytes = Vec::<u8>::new();
621    let mut exit_status_fut = Box::pin(child_process.wait()).fuse();
622    let mut buf = futures::io::BufReader::new(stderr);
623    loop {
624        select! {
625            _ = timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
626            exit_status = exit_status_fut => {
627                return Err(match exit_status {
628                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
629                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
630                })
631            },
632            read_res = buf.read_until(b'\n', &mut stderr_bytes).fuse() => {
633                match read_res {
634                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
635                    Ok(byte_count) => {
636                        if byte_count == 0 {
637                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
638                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
639                        }
640                        let start_offset = stderr_bytes.len() - byte_count;
641                        let new_bytes = &stderr_bytes[start_offset..];
642                        match std::str::from_utf8(new_bytes) {
643                            Err(_) => {
644                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
645                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
646                            }
647                            Ok(line) => {
648                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
649                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
650                                        return Ok(ws.trim().to_string());
651                                    }
652                                }
653                            }
654                        }
655                    }
656                }
657            }
658        }
659    }
660}
661
662#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
663pub enum HeadlessMode {
664    /// The "headful" mode.
665    False,
666    /// The old headless mode.
667    #[default]
668    True,
669    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
670    New,
671}
672
673#[derive(Debug, Clone, Default)]
674pub struct BrowserConfig {
675    /// Determines whether to run headless version of the browser. Defaults to
676    /// true.
677    headless: HeadlessMode,
678    /// Determines whether to run the browser with a sandbox.
679    sandbox: bool,
680    /// Launch the browser with a specific window width and height.
681    window_size: Option<(u32, u32)>,
682    /// Launch the browser with a specific debugging port.
683    port: u16,
684    /// Path for Chrome or Chromium.
685    ///
686    /// If unspecified, the create will try to automatically detect a suitable
687    /// binary.
688    executable: std::path::PathBuf,
689
690    /// A list of Chrome extensions to load.
691    ///
692    /// An extension should be a path to a folder containing the extension code.
693    /// CRX files cannot be used directly and must be first extracted.
694    ///
695    /// Note that Chrome does not support loading extensions in headless-mode.
696    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
697    extensions: Vec<String>,
698
699    /// Environment variables to set for the Chromium process.
700    /// Passes value through to std::process::Command::envs.
701    pub process_envs: Option<HashMap<String, String>>,
702
703    /// Data dir for user data
704    pub user_data_dir: Option<PathBuf>,
705
706    /// Whether to launch the `Browser` in incognito mode
707    incognito: bool,
708
709    /// Timeout duration for `Browser::launch`.
710    launch_timeout: Duration,
711
712    /// Ignore https errors, default is true
713    ignore_https_errors: bool,
714    pub viewport: Option<Viewport>,
715    /// The duration after a request with no response should time out
716    request_timeout: Duration,
717
718    /// Additional command line arguments to pass to the browser instance.
719    args: Vec<String>,
720
721    /// Whether to disable DEFAULT_ARGS or not, default is false
722    disable_default_args: bool,
723
724    /// Whether to enable request interception
725    pub request_intercept: bool,
726
727    /// Whether to enable cache.
728    pub cache_enabled: bool,
729    /// Whether to enable/disable service workers.
730    pub service_worker_enabled: bool,
731
732    /// Whether to ignore visuals when request interception is enabled.
733    pub ignore_visuals: bool,
734    /// Whether to ignore stylesheets when request interception is enabled.
735    pub ignore_stylesheets: bool,
736    /// Whether to ignore javascript when request interception is enabled. This will allow framework JS like react to go through.
737    pub ignore_javascript: bool,
738    /// Whether to ignore analytics when request interception is enabled.
739    pub ignore_analytics: bool,
740    /// Whether to ignore ads when request interception is enabled.
741    pub ignore_ads: bool,
742    /// Extra headers.
743    pub extra_headers: Option<std::collections::HashMap<String, String>>,
744    /// Only html
745    pub only_html: bool,
746    /// The interception intercept manager.
747    pub intercept_manager: NetworkInterceptManager,
748}
749
750#[derive(Debug, Clone)]
751pub struct BrowserConfigBuilder {
752    headless: HeadlessMode,
753    sandbox: bool,
754    window_size: Option<(u32, u32)>,
755    port: u16,
756    executable: Option<PathBuf>,
757    executation_detection: DetectionOptions,
758    extensions: Vec<String>,
759    process_envs: Option<HashMap<String, String>>,
760    user_data_dir: Option<PathBuf>,
761    incognito: bool,
762    launch_timeout: Duration,
763    ignore_https_errors: bool,
764    viewport: Option<Viewport>,
765    request_timeout: Duration,
766    args: Vec<String>,
767    disable_default_args: bool,
768    request_intercept: bool,
769    cache_enabled: bool,
770    service_worker_enabled: bool,
771    ignore_visuals: bool,
772    ignore_ads: bool,
773    ignore_javascript: bool,
774    ignore_stylesheets: bool,
775    ignore_analytics: bool,
776    only_html: bool,
777    extra_headers: Option<std::collections::HashMap<String, String>>,
778    intercept_manager: NetworkInterceptManager,
779}
780
781impl BrowserConfig {
782    pub fn builder() -> BrowserConfigBuilder {
783        BrowserConfigBuilder::default()
784    }
785
786    pub fn with_executable(path: impl AsRef<Path>) -> Self {
787        Self::builder().chrome_executable(path).build().unwrap()
788    }
789}
790
791impl Default for BrowserConfigBuilder {
792    fn default() -> Self {
793        Self {
794            headless: HeadlessMode::True,
795            sandbox: true,
796            window_size: None,
797            port: 0,
798            executable: None,
799            executation_detection: DetectionOptions::default(),
800            extensions: Vec::new(),
801            process_envs: None,
802            user_data_dir: None,
803            incognito: false,
804            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
805            ignore_https_errors: true,
806            viewport: Some(Default::default()),
807            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
808            args: Vec::new(),
809            disable_default_args: false,
810            request_intercept: false,
811            cache_enabled: true,
812            ignore_visuals: false,
813            ignore_ads: false,
814            ignore_javascript: false,
815            ignore_analytics: false,
816            ignore_stylesheets: false,
817            only_html: false,
818            extra_headers: Default::default(),
819            service_worker_enabled: true,
820            intercept_manager: NetworkInterceptManager::Unknown,
821        }
822    }
823}
824
825impl BrowserConfigBuilder {
826    pub fn window_size(mut self, width: u32, height: u32) -> Self {
827        self.window_size = Some((width, height));
828        self
829    }
830
831    pub fn no_sandbox(mut self) -> Self {
832        self.sandbox = false;
833        self
834    }
835
836    pub fn with_head(mut self) -> Self {
837        self.headless = HeadlessMode::False;
838        self
839    }
840
841    pub fn new_headless_mode(mut self) -> Self {
842        self.headless = HeadlessMode::New;
843        self
844    }
845
846    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
847        self.headless = mode;
848        self
849    }
850
851    pub fn incognito(mut self) -> Self {
852        self.incognito = true;
853        self
854    }
855
856    pub fn respect_https_errors(mut self) -> Self {
857        self.ignore_https_errors = false;
858        self
859    }
860
861    pub fn port(mut self, port: u16) -> Self {
862        self.port = port;
863        self
864    }
865
866    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
867        self.launch_timeout = timeout;
868        self
869    }
870
871    pub fn request_timeout(mut self, timeout: Duration) -> Self {
872        self.request_timeout = timeout;
873        self
874    }
875
876    /// Configures the viewport of the browser, which defaults to `800x600`.
877    /// `None` disables viewport emulation (i.e., it uses the browsers default
878    /// configuration, which fills the available space. This is similar to what
879    /// Playwright does when you provide `null` as the value of its `viewport`
880    /// option).
881    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
882        self.viewport = viewport.into();
883        self
884    }
885
886    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
887        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
888        self
889    }
890
891    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
892        self.executable = Some(path.as_ref().to_path_buf());
893        self
894    }
895
896    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
897        self.executation_detection = options;
898        self
899    }
900
901    pub fn extension(mut self, extension: impl Into<String>) -> Self {
902        self.extensions.push(extension.into());
903        self
904    }
905
906    pub fn extensions<I, S>(mut self, extensions: I) -> Self
907    where
908        I: IntoIterator<Item = S>,
909        S: Into<String>,
910    {
911        for ext in extensions {
912            self.extensions.push(ext.into());
913        }
914        self
915    }
916
917    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
918        self.process_envs
919            .get_or_insert(HashMap::new())
920            .insert(key.into(), val.into());
921        self
922    }
923
924    pub fn envs<I, K, V>(mut self, envs: I) -> Self
925    where
926        I: IntoIterator<Item = (K, V)>,
927        K: Into<String>,
928        V: Into<String>,
929    {
930        self.process_envs
931            .get_or_insert(HashMap::new())
932            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
933        self
934    }
935
936    pub fn arg(mut self, arg: impl Into<String>) -> Self {
937        self.args.push(arg.into());
938        self
939    }
940
941    pub fn args<I, S>(mut self, args: I) -> Self
942    where
943        I: IntoIterator<Item = S>,
944        S: Into<String>,
945    {
946        for arg in args {
947            self.args.push(arg.into());
948        }
949        self
950    }
951
952    pub fn disable_default_args(mut self) -> Self {
953        self.disable_default_args = true;
954        self
955    }
956
957    pub fn enable_request_intercept(mut self) -> Self {
958        self.request_intercept = true;
959        self
960    }
961
962    pub fn disable_request_intercept(mut self) -> Self {
963        self.request_intercept = false;
964        self
965    }
966
967    pub fn enable_cache(mut self) -> Self {
968        self.cache_enabled = true;
969        self
970    }
971
972    pub fn disable_cache(mut self) -> Self {
973        self.cache_enabled = false;
974        self
975    }
976
977    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
978        self.service_worker_enabled = bypass;
979        self
980    }
981
982    pub fn set_extra_headers(
983        mut self,
984        headers: Option<std::collections::HashMap<String, String>>,
985    ) -> Self {
986        self.extra_headers = headers;
987        self
988    }
989
990    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
991        let executable = if let Some(e) = self.executable {
992            e
993        } else {
994            detection::default_executable(self.executation_detection)?
995        };
996
997        Ok(BrowserConfig {
998            headless: self.headless,
999            sandbox: self.sandbox,
1000            window_size: self.window_size,
1001            port: self.port,
1002            executable,
1003            extensions: self.extensions,
1004            process_envs: self.process_envs,
1005            user_data_dir: self.user_data_dir,
1006            incognito: self.incognito,
1007            launch_timeout: self.launch_timeout,
1008            ignore_https_errors: self.ignore_https_errors,
1009            viewport: self.viewport,
1010            request_timeout: self.request_timeout,
1011            args: self.args,
1012            disable_default_args: self.disable_default_args,
1013            request_intercept: self.request_intercept,
1014            cache_enabled: self.cache_enabled,
1015            ignore_visuals: self.ignore_visuals,
1016            ignore_ads: self.ignore_ads,
1017            ignore_javascript: self.ignore_javascript,
1018            ignore_analytics: self.ignore_analytics,
1019            ignore_stylesheets: self.ignore_stylesheets,
1020            extra_headers: self.extra_headers,
1021            only_html: self.only_html,
1022            intercept_manager: self.intercept_manager,
1023            service_worker_enabled: self.service_worker_enabled,
1024        })
1025    }
1026}
1027
1028impl BrowserConfig {
1029    pub fn launch(&self) -> io::Result<Child> {
1030        let mut cmd = async_process::Command::new(&self.executable);
1031
1032        if self.disable_default_args {
1033            cmd.args(&self.args);
1034        } else {
1035            cmd.args(DEFAULT_ARGS).args(&self.args);
1036        }
1037
1038        if !self
1039            .args
1040            .iter()
1041            .any(|arg| arg.contains("--remote-debugging-port="))
1042        {
1043            cmd.arg(format!("--remote-debugging-port={}", self.port));
1044        }
1045
1046        cmd.args(
1047            self.extensions
1048                .iter()
1049                .map(|e| format!("--load-extension={e}")),
1050        );
1051
1052        if let Some(ref user_data) = self.user_data_dir {
1053            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1054        } else {
1055            // If the user did not specify a data directory, this would default to the systems default
1056            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1057            // a temp dir just for chromiumoxide instead.
1058            cmd.arg(format!(
1059                "--user-data-dir={}",
1060                std::env::temp_dir().join("chromiumoxide-runner").display()
1061            ));
1062        }
1063
1064        if let Some((width, height)) = self.window_size {
1065            cmd.arg(format!("--window-size={width},{height}"));
1066        }
1067
1068        if !self.sandbox {
1069            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1070        }
1071
1072        match self.headless {
1073            HeadlessMode::False => (),
1074            HeadlessMode::True => {
1075                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1076            }
1077            HeadlessMode::New => {
1078                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1079            }
1080        }
1081
1082        if self.incognito {
1083            cmd.arg("--incognito");
1084        }
1085
1086        if let Some(ref envs) = self.process_envs {
1087            cmd.envs(envs);
1088        }
1089        cmd.stderr(Stdio::piped()).spawn()
1090    }
1091}
1092
1093/// Returns the path to Chrome's executable.
1094///
1095/// If the `CHROME` environment variable is set, `default_executable` will
1096/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1097/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1098/// searched for in standard places. If that fails,
1099/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1100/// Windows) is consulted. If all of the above fail, an error is returned.
1101#[deprecated(note = "Use detection::default_executable instead")]
1102pub fn default_executable() -> Result<std::path::PathBuf, String> {
1103    let options = DetectionOptions {
1104        msedge: false,
1105        unstable: false,
1106    };
1107    detection::default_executable(options)
1108}
1109
1110/// These are passed to the Chrome binary by default.
1111/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1112static DEFAULT_ARGS: [&str; 25] = [
1113    "--disable-background-networking",
1114    "--enable-features=NetworkService,NetworkServiceInProcess",
1115    "--disable-background-timer-throttling",
1116    "--disable-backgrounding-occluded-windows",
1117    "--disable-breakpad",
1118    "--disable-client-side-phishing-detection",
1119    "--disable-component-extensions-with-background-pages",
1120    "--disable-default-apps",
1121    "--disable-dev-shm-usage",
1122    "--disable-extensions",
1123    "--disable-features=TranslateUI",
1124    "--disable-hang-monitor",
1125    "--disable-ipc-flooding-protection",
1126    "--disable-popup-blocking",
1127    "--disable-prompt-on-repost",
1128    "--disable-renderer-backgrounding",
1129    "--disable-sync",
1130    "--force-color-profile=srgb",
1131    "--metrics-recording-only",
1132    "--no-first-run",
1133    "--enable-automation",
1134    "--password-store=basic",
1135    "--use-mock-keychain",
1136    "--enable-blink-features=IdleDetection",
1137    "--lang=en_US",
1138];