chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use futures::channel::mpsc::{channel, unbounded, Sender};
11use futures::channel::oneshot::channel as oneshot_channel;
12use futures::select;
13use futures::SinkExt;
14
15use crate::async_process::{self, Child, ExitStatus, Stdio};
16use crate::cmd::{to_command_response, CommandMessage};
17use crate::conn::Connection;
18use crate::detection::{self, DetectionOptions};
19use crate::error::{BrowserStderr, CdpError, Result};
20use crate::handler::blockers::intercept_manager::NetworkInterceptManager;
21use crate::handler::browser::BrowserContext;
22use crate::handler::viewport::Viewport;
23use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
24use crate::listeners::{EventListenerRequest, EventStream};
25use crate::page::Page;
26use crate::utils;
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams, TargetId,
36    TargetInfo,
37};
38use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
39use chromiumoxide_types::*;
40
41/// Default `Browser::launch` timeout in MS
42pub const LAUNCH_TIMEOUT: u64 = 20_000;
43
44lazy_static::lazy_static! {
45    /// The request client to get the web socket url.
46    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
47        .timeout(Duration::from_secs(60))
48        .default_headers({
49            let mut m = HeaderMap::new();
50
51            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
52
53            m
54        })
55        .build()
56        .unwrap();
57}
58
59/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
60#[derive(Debug)]
61pub struct Browser {
62    /// The `Sender` to send messages to the connection handler that drives the
63    /// websocket
64    sender: Sender<HandlerMessage>,
65    /// How the spawned chromium instance was configured, if any
66    config: Option<BrowserConfig>,
67    /// The spawned chromium instance
68    child: Option<Child>,
69    /// The debug web socket url of the chromium instance
70    debug_ws_url: String,
71    /// The context of the browser
72    pub browser_context: BrowserContext,
73}
74
75/// Browser connection information.
76#[derive(serde::Deserialize, Debug, Default)]
77pub struct BrowserConnection {
78    #[serde(rename = "Browser")]
79    /// The browser name
80    pub browser: String,
81    #[serde(rename = "Protocol-Version")]
82    /// Browser version
83    pub protocol_version: String,
84    #[serde(rename = "User-Agent")]
85    /// User Agent used by default.
86    pub user_agent: String,
87    #[serde(rename = "V8-Version")]
88    /// The v8 engine version
89    pub v8_version: String,
90    #[serde(rename = "WebKit-Version")]
91    /// Webkit version
92    pub webkit_version: String,
93    #[serde(rename = "webSocketDebuggerUrl")]
94    /// Remote debugging address
95    pub web_socket_debugger_url: String,
96}
97
98impl Browser {
99    /// Connect to an already running chromium instance via the given URL.
100    ///
101    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
102    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
103        Self::connect_with_config(url, HandlerConfig::default()).await
104    }
105
106    // Connect to an already running chromium instance with a given `HandlerConfig`.
107    ///
108    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
109    pub async fn connect_with_config(
110        url: impl Into<String>,
111        config: HandlerConfig,
112    ) -> Result<(Self, Handler)> {
113        let mut debug_ws_url = url.into();
114
115        if debug_ws_url.starts_with("http") {
116            match REQUEST_CLIENT
117                .get(
118                    if debug_ws_url.ends_with("/json/version")
119                        || debug_ws_url.ends_with("/json/version/")
120                    {
121                        debug_ws_url.to_owned()
122                    } else {
123                        format!(
124                            "{}{}json/version",
125                            &debug_ws_url,
126                            if debug_ws_url.ends_with('/') { "" } else { "/" }
127                        )
128                    },
129                )
130                .send()
131                .await
132            {
133                Ok(req) => {
134                    let connection: BrowserConnection =
135                        crate::serde_json::from_slice(&req.bytes().await.unwrap_or_default())
136                            .unwrap_or_default();
137                    if !connection.web_socket_debugger_url.is_empty() {
138                        debug_ws_url = connection.web_socket_debugger_url;
139                    }
140                }
141                Err(_) => return Err(CdpError::NoResponse),
142            }
143        }
144
145        let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
146
147        let (tx, rx) = channel(1000);
148
149        let fut = Handler::new(conn, rx, config);
150        let browser_context = fut.default_browser_context().clone();
151
152        let browser = Self {
153            sender: tx,
154            config: None,
155            child: None,
156            debug_ws_url,
157            browser_context,
158        };
159
160        Ok((browser, fut))
161    }
162
163    /// Launches a new instance of `chromium` in the background and attaches to
164    /// its debug web socket.
165    ///
166    /// This fails when no chromium executable could be detected.
167    ///
168    /// This fails if no web socket url could be detected from the child
169    /// processes stderr for more than the configured `launch_timeout`
170    /// (20 seconds by default).
171    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
172        // Canonalize paths to reduce issues with sandboxing
173        config.executable = utils::canonicalize_except_snap(config.executable).await?;
174
175        // Launch a new chromium instance
176        let mut child = config.launch()?;
177
178        /// Faillible initialization to run once the child process is created.
179        ///
180        /// All faillible calls must be executed inside this function. This ensures that all
181        /// errors are caught and that the child process is properly cleaned-up.
182        async fn with_child(
183            config: &BrowserConfig,
184            child: &mut Child,
185        ) -> Result<(String, Connection<CdpEventMessage>)> {
186            let dur = config.launch_timeout;
187            let timeout_fut = Box::pin(tokio::time::sleep(dur));
188
189            // extract the ws:
190            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
191            let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
192            Ok((debug_ws_url, conn))
193        }
194
195        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
196            Ok(conn) => conn,
197            Err(e) => {
198                // An initialization error occurred, clean up the process
199                if let Ok(Some(_)) = child.try_wait() {
200                    // already exited, do nothing, may happen if the browser crashed
201                } else {
202                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
203                    child.kill().await.expect("`Browser::launch` failed but could not clean-up the child process (`kill`)");
204                    child.wait().await.expect("`Browser::launch` failed but could not clean-up the child process (`wait`)");
205                }
206                return Err(e);
207            }
208        };
209
210        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
211        // child process.
212
213        let (tx, rx) = channel(1000);
214
215        let handler_config = HandlerConfig {
216            ignore_https_errors: config.ignore_https_errors,
217            viewport: config.viewport.clone(),
218            context_ids: Vec::new(),
219            request_timeout: config.request_timeout,
220            request_intercept: config.request_intercept,
221            cache_enabled: config.cache_enabled,
222            ignore_visuals: config.ignore_visuals,
223            ignore_stylesheets: config.ignore_stylesheets,
224            ignore_javascript: config.ignore_javascript,
225            ignore_analytics: config.ignore_analytics,
226            ignore_ads: config.ignore_ads,
227            extra_headers: config.extra_headers.clone(),
228            only_html: config.only_html,
229            service_worker_enabled: config.service_worker_enabled,
230            created_first_target: false,
231            intercept_manager: config.intercept_manager,
232        };
233
234        let fut = Handler::new(conn, rx, handler_config);
235        let browser_context = fut.default_browser_context().clone();
236
237        let browser = Self {
238            sender: tx,
239            config: Some(config),
240            child: Some(child),
241            debug_ws_url,
242            browser_context,
243        };
244
245        Ok((browser, fut))
246    }
247
248    /// Request to fetch all existing browser targets.
249    ///
250    /// By default, only targets launched after the browser connection are tracked
251    /// when connecting to a existing browser instance with the devtools websocket url
252    /// This function fetches existing targets on the browser and adds them as pages internally
253    ///
254    /// The pages are not guaranteed to be ready as soon as the function returns
255    /// You should wait a few millis if you need to use a page
256    /// Returns [TargetInfo]
257    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
258        let (tx, rx) = oneshot_channel();
259
260        self.sender
261            .clone()
262            .send(HandlerMessage::FetchTargets(tx))
263            .await?;
264
265        rx.await?
266    }
267
268    /// Request for the browser to close completely.
269    ///
270    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
271    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
272    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
273    /// [`Browser::drop`] waits automatically if needed.
274    pub async fn close(&mut self) -> Result<CloseReturns> {
275        let (tx, rx) = oneshot_channel();
276
277        self.sender
278            .clone()
279            .send(HandlerMessage::CloseBrowser(tx))
280            .await?;
281
282        rx.await?
283    }
284
285    /// Asynchronously wait for the spawned chromium instance to exit completely.
286    ///
287    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
288    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
289    /// "zombie" processes.
290    ///
291    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
292    /// connected to an existing browser through [`Browser::connect`])
293    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
294        if let Some(child) = self.child.as_mut() {
295            Ok(Some(child.wait().await?))
296        } else {
297            Ok(None)
298        }
299    }
300
301    /// If the spawned chromium instance has completely exited, wait for it.
302    ///
303    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
304    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
305    /// "zombie" processes.
306    ///
307    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
308    /// connected to an existing browser through [`Browser::connect`])
309    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
310        if let Some(child) = self.child.as_mut() {
311            child.try_wait()
312        } else {
313            Ok(None)
314        }
315    }
316
317    /// Get the spawned chromium instance
318    ///
319    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
320    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
321    ///
322    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
323    /// for the selected runtime.
324    ///
325    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
326    /// connected to an existing browser through [`Browser::connect`])
327    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
328        self.child.as_mut()
329    }
330
331    /// Has a browser instance launched on system.
332    pub fn has_child(&self) -> bool {
333        self.child.is_some()
334    }
335
336    /// Forcibly kill the spawned chromium instance
337    ///
338    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
339    /// process to exit to avoid "zombie" processes.
340    ///
341    /// This method is provided to help if the browser does not close by itself. You should prefer
342    /// to use [`Browser::close`].
343    ///
344    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
345    /// connected to an existing browser through [`Browser::connect`])
346    pub async fn kill(&mut self) -> Option<io::Result<()>> {
347        match self.child.as_mut() {
348            Some(child) => Some(child.kill().await),
349            None => None,
350        }
351    }
352
353    /// If not launched as incognito this creates a new incognito browser
354    /// context. After that this browser exists within the incognito session.
355    /// New pages created while being in incognito mode will also run in the
356    /// incognito context. Incognito contexts won't share cookies/cache with
357    /// other browser contexts.
358    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
359        if !self.is_incognito_configured() {
360            let browser_context_id = self
361                .create_browser_context(CreateBrowserContextParams::default())
362                .await?;
363            self.browser_context = BrowserContext::from(browser_context_id);
364            self.sender
365                .clone()
366                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
367                .await?;
368        }
369
370        Ok(self)
371    }
372
373    /// If a incognito session was created with
374    /// `Browser::start_incognito_context` this disposes this context.
375    ///
376    /// # Note This will also dispose all pages that were running within the
377    /// incognito context.
378    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
379        if let Some(id) = self.browser_context.take() {
380            self.dispose_browser_context(id.clone()).await?;
381            self.sender
382                .clone()
383                .send(HandlerMessage::DisposeContext(BrowserContext::from(id)))
384                .await?;
385        }
386        Ok(self)
387    }
388
389    /// Whether incognito mode was configured from the start
390    fn is_incognito_configured(&self) -> bool {
391        self.config
392            .as_ref()
393            .map(|c| c.incognito)
394            .unwrap_or_default()
395    }
396
397    /// Returns the address of the websocket this browser is attached to
398    pub fn websocket_address(&self) -> &String {
399        &self.debug_ws_url
400    }
401
402    /// Whether the BrowserContext is incognito.
403    pub fn is_incognito(&self) -> bool {
404        self.is_incognito_configured() || self.browser_context.is_incognito()
405    }
406
407    /// The config of the spawned chromium instance if any.
408    pub fn config(&self) -> Option<&BrowserConfig> {
409        self.config.as_ref()
410    }
411
412    /// Create a new browser page
413    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
414        let (tx, rx) = oneshot_channel();
415        let mut params = params.into();
416
417        if let Some(id) = self.browser_context.id() {
418            if params.browser_context_id.is_none() {
419                params.browser_context_id = Some(id.clone());
420            }
421        }
422
423        let _ = self
424            .sender
425            .clone()
426            .send(HandlerMessage::CreatePage(params, tx))
427            .await;
428
429        rx.await?
430    }
431
432    /// Version information about the browser
433    pub async fn version(&self) -> Result<GetVersionReturns> {
434        Ok(self.execute(GetVersionParams::default()).await?.result)
435    }
436
437    /// Returns the user agent of the browser
438    pub async fn user_agent(&self) -> Result<String> {
439        Ok(self.version().await?.user_agent)
440    }
441
442    /// Call a browser method.
443    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
444        let (tx, rx) = oneshot_channel();
445        let method = cmd.identifier();
446        let msg = CommandMessage::new(cmd, tx)?;
447
448        self.sender
449            .clone()
450            .send(HandlerMessage::Command(msg))
451            .await?;
452        let resp = rx.await??;
453        to_command_response::<T>(resp, method)
454    }
455
456    /// Return all of the pages of the browser
457    pub async fn pages(&self) -> Result<Vec<Page>> {
458        let (tx, rx) = oneshot_channel();
459        self.sender
460            .clone()
461            .send(HandlerMessage::GetPages(tx))
462            .await?;
463        Ok(rx.await?)
464    }
465
466    /// Return page of given target_id
467    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
468        let (tx, rx) = oneshot_channel();
469        self.sender
470            .clone()
471            .send(HandlerMessage::GetPage(target_id, tx))
472            .await?;
473        rx.await?.ok_or(CdpError::NotFound)
474    }
475
476    /// Set listener for browser event
477    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
478        let (tx, rx) = unbounded();
479        self.sender
480            .clone()
481            .send(HandlerMessage::AddEventListener(
482                EventListenerRequest::new::<T>(tx),
483            ))
484            .await?;
485
486        Ok(EventStream::new(rx))
487    }
488
489    /// Creates a new empty browser context.
490    pub async fn create_browser_context(
491        &mut self,
492        params: CreateBrowserContextParams,
493    ) -> Result<BrowserContextId> {
494        let response = self.execute(params).await?;
495        Ok(response.result.browser_context_id)
496    }
497
498    /// Send a new empty browser context.
499    pub async fn send_new_context(&mut self, browser_context_id: BrowserContextId) -> Result<()> {
500        self.browser_context = BrowserContext::from(browser_context_id);
501        self.sender
502            .clone()
503            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
504            .await?;
505        Ok(())
506    }
507
508    /// Deletes a browser context.
509    pub async fn dispose_browser_context(
510        &self,
511        browser_context_id: impl Into<BrowserContextId>,
512    ) -> Result<()> {
513        self.execute(DisposeBrowserContextParams::new(browser_context_id))
514            .await?;
515
516        Ok(())
517    }
518
519    /// Clears cookies.
520    pub async fn clear_cookies(&self) -> Result<()> {
521        self.execute(ClearCookiesParams::default()).await?;
522        Ok(())
523    }
524
525    /// Returns all browser cookies.
526    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
527        let mut cmd = GetCookiesParams::default();
528
529        cmd.browser_context_id = self.browser_context.id.clone();
530
531        Ok(self.execute(cmd).await?.result.cookies)
532    }
533
534    /// Sets given cookies.
535    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
536        for cookie in &mut cookies {
537            if let Some(url) = cookie.url.as_ref() {
538                crate::page::validate_cookie_url(url)?;
539            }
540        }
541
542        let mut cookies_param = SetCookiesParams::new(cookies);
543
544        cookies_param.browser_context_id = self.browser_context.id.clone();
545
546        self.execute(cookies_param).await?;
547        Ok(self)
548    }
549}
550
551impl Drop for Browser {
552    fn drop(&mut self) {
553        if let Some(child) = self.child.as_mut() {
554            if let Ok(Some(_)) = child.try_wait() {
555                // Already exited, do nothing. Usually occurs after using the method close or kill.
556            } else {
557                // We set the `kill_on_drop` property for the child process, so no need to explicitely
558                // kill it here. It can't really be done anyway since the method is async.
559                //
560                // On Unix, the process will be reaped in the background by the runtime automatically
561                // so it won't leave any resources locked. It is, however, a better practice for the user to
562                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
563                // warn him here.
564                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
565            }
566        }
567    }
568}
569
570/// Resolve devtools WebSocket URL from the provided browser process
571///
572/// If an error occurs, it returns the browser's stderr output.
573///
574/// The URL resolution fails if:
575/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
576/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
577/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
578///   the browser's stderr: end of stream, invalid UTF-8, other
579async fn ws_url_from_output(
580    child_process: &mut Child,
581    timeout_fut: impl Future<Output = ()> + Unpin,
582) -> Result<String> {
583    use futures::{AsyncBufReadExt, FutureExt};
584    let mut timeout_fut = timeout_fut.fuse();
585    let stderr = child_process.stderr.take().expect("no stderror");
586    let mut stderr_bytes = Vec::<u8>::new();
587    let mut exit_status_fut = Box::pin(child_process.wait()).fuse();
588    let mut buf = futures::io::BufReader::new(stderr);
589    loop {
590        select! {
591            _ = timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
592            exit_status = exit_status_fut => {
593                return Err(match exit_status {
594                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
595                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
596                })
597            },
598            read_res = buf.read_until(b'\n', &mut stderr_bytes).fuse() => {
599                match read_res {
600                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
601                    Ok(byte_count) => {
602                        if byte_count == 0 {
603                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
604                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
605                        }
606                        let start_offset = stderr_bytes.len() - byte_count;
607                        let new_bytes = &stderr_bytes[start_offset..];
608                        match std::str::from_utf8(new_bytes) {
609                            Err(_) => {
610                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
611                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
612                            }
613                            Ok(line) => {
614                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
615                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
616                                        return Ok(ws.trim().to_string());
617                                    }
618                                }
619                            }
620                        }
621                    }
622                }
623            }
624        }
625    }
626}
627
628#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
629pub enum HeadlessMode {
630    /// The "headful" mode.
631    False,
632    /// The old headless mode.
633    #[default]
634    True,
635    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
636    New,
637}
638
639#[derive(Debug, Clone)]
640pub struct BrowserConfig {
641    /// Determines whether to run headless version of the browser. Defaults to
642    /// true.
643    headless: HeadlessMode,
644    /// Determines whether to run the browser with a sandbox.
645    sandbox: bool,
646    /// Launch the browser with a specific window width and height.
647    window_size: Option<(u32, u32)>,
648    /// Launch the browser with a specific debugging port.
649    port: u16,
650    /// Path for Chrome or Chromium.
651    ///
652    /// If unspecified, the create will try to automatically detect a suitable
653    /// binary.
654    executable: std::path::PathBuf,
655
656    /// A list of Chrome extensions to load.
657    ///
658    /// An extension should be a path to a folder containing the extension code.
659    /// CRX files cannot be used directly and must be first extracted.
660    ///
661    /// Note that Chrome does not support loading extensions in headless-mode.
662    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
663    extensions: Vec<String>,
664
665    /// Environment variables to set for the Chromium process.
666    /// Passes value through to std::process::Command::envs.
667    pub process_envs: Option<HashMap<String, String>>,
668
669    /// Data dir for user data
670    pub user_data_dir: Option<PathBuf>,
671
672    /// Whether to launch the `Browser` in incognito mode
673    incognito: bool,
674
675    /// Timeout duration for `Browser::launch`.
676    launch_timeout: Duration,
677
678    /// Ignore https errors, default is true
679    ignore_https_errors: bool,
680    viewport: Option<Viewport>,
681    /// The duration after a request with no response should time out
682    request_timeout: Duration,
683
684    /// Additional command line arguments to pass to the browser instance.
685    args: Vec<String>,
686
687    /// Whether to disable DEFAULT_ARGS or not, default is false
688    disable_default_args: bool,
689
690    /// Whether to enable request interception
691    pub request_intercept: bool,
692
693    /// Whether to enable cache.
694    pub cache_enabled: bool,
695    /// Whether to enable/disable service workers.
696    pub service_worker_enabled: bool,
697
698    /// Whether to ignore visuals when request interception is enabled.
699    pub ignore_visuals: bool,
700    /// Whether to ignore stylesheets when request interception is enabled.
701    pub ignore_stylesheets: bool,
702    /// Whether to ignore javascript when request interception is enabled. This will allow framework JS like react to go through.
703    pub ignore_javascript: bool,
704    /// Whether to ignore analytics when request interception is enabled.
705    pub ignore_analytics: bool,
706    /// Whether to ignore ads when request interception is enabled.
707    pub ignore_ads: bool,
708    /// Extra headers.
709    pub extra_headers: Option<std::collections::HashMap<String, String>>,
710    /// Only html
711    pub only_html: bool,
712    /// The interception intercept manager.
713    pub intercept_manager: NetworkInterceptManager,
714}
715
716#[derive(Debug, Clone)]
717pub struct BrowserConfigBuilder {
718    headless: HeadlessMode,
719    sandbox: bool,
720    window_size: Option<(u32, u32)>,
721    port: u16,
722    executable: Option<PathBuf>,
723    executation_detection: DetectionOptions,
724    extensions: Vec<String>,
725    process_envs: Option<HashMap<String, String>>,
726    user_data_dir: Option<PathBuf>,
727    incognito: bool,
728    launch_timeout: Duration,
729    ignore_https_errors: bool,
730    viewport: Option<Viewport>,
731    request_timeout: Duration,
732    args: Vec<String>,
733    disable_default_args: bool,
734    request_intercept: bool,
735    cache_enabled: bool,
736    service_worker_enabled: bool,
737    ignore_visuals: bool,
738    ignore_ads: bool,
739    ignore_javascript: bool,
740    ignore_stylesheets: bool,
741    ignore_analytics: bool,
742    only_html: bool,
743    extra_headers: Option<std::collections::HashMap<String, String>>,
744    intercept_manager: NetworkInterceptManager,
745}
746
747impl BrowserConfig {
748    pub fn builder() -> BrowserConfigBuilder {
749        BrowserConfigBuilder::default()
750    }
751
752    pub fn with_executable(path: impl AsRef<Path>) -> Self {
753        Self::builder().chrome_executable(path).build().unwrap()
754    }
755}
756
757impl Default for BrowserConfigBuilder {
758    fn default() -> Self {
759        Self {
760            headless: HeadlessMode::True,
761            sandbox: true,
762            window_size: None,
763            port: 0,
764            executable: None,
765            executation_detection: DetectionOptions::default(),
766            extensions: Vec::new(),
767            process_envs: None,
768            user_data_dir: None,
769            incognito: false,
770            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
771            ignore_https_errors: true,
772            viewport: Some(Default::default()),
773            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
774            args: Vec::new(),
775            disable_default_args: false,
776            request_intercept: false,
777            cache_enabled: true,
778            ignore_visuals: false,
779            ignore_ads: false,
780            ignore_javascript: false,
781            ignore_analytics: false,
782            ignore_stylesheets: false,
783            only_html: false,
784            extra_headers: Default::default(),
785            service_worker_enabled: true,
786            intercept_manager: NetworkInterceptManager::UNKNOWN,
787        }
788    }
789}
790
791impl BrowserConfigBuilder {
792    pub fn window_size(mut self, width: u32, height: u32) -> Self {
793        self.window_size = Some((width, height));
794        self
795    }
796
797    pub fn no_sandbox(mut self) -> Self {
798        self.sandbox = false;
799        self
800    }
801
802    pub fn with_head(mut self) -> Self {
803        self.headless = HeadlessMode::False;
804        self
805    }
806
807    pub fn new_headless_mode(mut self) -> Self {
808        self.headless = HeadlessMode::New;
809        self
810    }
811
812    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
813        self.headless = mode;
814        self
815    }
816
817    pub fn incognito(mut self) -> Self {
818        self.incognito = true;
819        self
820    }
821
822    pub fn respect_https_errors(mut self) -> Self {
823        self.ignore_https_errors = false;
824        self
825    }
826
827    pub fn port(mut self, port: u16) -> Self {
828        self.port = port;
829        self
830    }
831
832    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
833        self.launch_timeout = timeout;
834        self
835    }
836
837    pub fn request_timeout(mut self, timeout: Duration) -> Self {
838        self.request_timeout = timeout;
839        self
840    }
841
842    /// Configures the viewport of the browser, which defaults to `800x600`.
843    /// `None` disables viewport emulation (i.e., it uses the browsers default
844    /// configuration, which fills the available space. This is similar to what
845    /// Playwright does when you provide `null` as the value of its `viewport`
846    /// option).
847    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
848        self.viewport = viewport.into();
849        self
850    }
851
852    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
853        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
854        self
855    }
856
857    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
858        self.executable = Some(path.as_ref().to_path_buf());
859        self
860    }
861
862    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
863        self.executation_detection = options;
864        self
865    }
866
867    pub fn extension(mut self, extension: impl Into<String>) -> Self {
868        self.extensions.push(extension.into());
869        self
870    }
871
872    pub fn extensions<I, S>(mut self, extensions: I) -> Self
873    where
874        I: IntoIterator<Item = S>,
875        S: Into<String>,
876    {
877        for ext in extensions {
878            self.extensions.push(ext.into());
879        }
880        self
881    }
882
883    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
884        self.process_envs
885            .get_or_insert(HashMap::new())
886            .insert(key.into(), val.into());
887        self
888    }
889
890    pub fn envs<I, K, V>(mut self, envs: I) -> Self
891    where
892        I: IntoIterator<Item = (K, V)>,
893        K: Into<String>,
894        V: Into<String>,
895    {
896        self.process_envs
897            .get_or_insert(HashMap::new())
898            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
899        self
900    }
901
902    pub fn arg(mut self, arg: impl Into<String>) -> Self {
903        self.args.push(arg.into());
904        self
905    }
906
907    pub fn args<I, S>(mut self, args: I) -> Self
908    where
909        I: IntoIterator<Item = S>,
910        S: Into<String>,
911    {
912        for arg in args {
913            self.args.push(arg.into());
914        }
915        self
916    }
917
918    pub fn disable_default_args(mut self) -> Self {
919        self.disable_default_args = true;
920        self
921    }
922
923    pub fn enable_request_intercept(mut self) -> Self {
924        self.request_intercept = true;
925        self
926    }
927
928    pub fn disable_request_intercept(mut self) -> Self {
929        self.request_intercept = false;
930        self
931    }
932
933    pub fn enable_cache(mut self) -> Self {
934        self.cache_enabled = true;
935        self
936    }
937
938    pub fn disable_cache(mut self) -> Self {
939        self.cache_enabled = false;
940        self
941    }
942
943    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
944        self.service_worker_enabled = bypass;
945        self
946    }
947
948    pub fn set_extra_headers(
949        mut self,
950        headers: Option<std::collections::HashMap<String, String>>,
951    ) -> Self {
952        self.extra_headers = headers;
953        self
954    }
955    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
956        let executable = if let Some(e) = self.executable {
957            e
958        } else {
959            detection::default_executable(self.executation_detection)?
960        };
961
962        Ok(BrowserConfig {
963            headless: self.headless,
964            sandbox: self.sandbox,
965            window_size: self.window_size,
966            port: self.port,
967            executable,
968            extensions: self.extensions,
969            process_envs: self.process_envs,
970            user_data_dir: self.user_data_dir,
971            incognito: self.incognito,
972            launch_timeout: self.launch_timeout,
973            ignore_https_errors: self.ignore_https_errors,
974            viewport: self.viewport,
975            request_timeout: self.request_timeout,
976            args: self.args,
977            disable_default_args: self.disable_default_args,
978            request_intercept: self.request_intercept,
979            cache_enabled: self.cache_enabled,
980            ignore_visuals: self.ignore_visuals,
981            ignore_ads: self.ignore_ads,
982            ignore_javascript: self.ignore_javascript,
983            ignore_analytics: self.ignore_analytics,
984            ignore_stylesheets: self.ignore_stylesheets,
985            extra_headers: self.extra_headers,
986            only_html: self.only_html,
987            intercept_manager: self.intercept_manager,
988            service_worker_enabled: self.service_worker_enabled,
989        })
990    }
991}
992
993impl BrowserConfig {
994    pub fn launch(&self) -> io::Result<Child> {
995        let mut cmd = async_process::Command::new(&self.executable);
996
997        if self.disable_default_args {
998            cmd.args(&self.args);
999        } else {
1000            cmd.args(DEFAULT_ARGS).args(&self.args);
1001        }
1002
1003        if !self
1004            .args
1005            .iter()
1006            .any(|arg| arg.contains("--remote-debugging-port="))
1007        {
1008            cmd.arg(format!("--remote-debugging-port={}", self.port));
1009        }
1010
1011        cmd.args(
1012            self.extensions
1013                .iter()
1014                .map(|e| format!("--load-extension={e}")),
1015        );
1016
1017        if let Some(ref user_data) = self.user_data_dir {
1018            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1019        } else {
1020            // If the user did not specify a data directory, this would default to the systems default
1021            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1022            // a temp dir just for chromiumoxide instead.
1023            cmd.arg(format!(
1024                "--user-data-dir={}",
1025                std::env::temp_dir().join("chromiumoxide-runner").display()
1026            ));
1027        }
1028
1029        if let Some((width, height)) = self.window_size {
1030            cmd.arg(format!("--window-size={width},{height}"));
1031        }
1032
1033        if !self.sandbox {
1034            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1035        }
1036
1037        match self.headless {
1038            HeadlessMode::False => (),
1039            HeadlessMode::True => {
1040                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1041            }
1042            HeadlessMode::New => {
1043                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1044            }
1045        }
1046
1047        if self.incognito {
1048            cmd.arg("--incognito");
1049        }
1050
1051        if let Some(ref envs) = self.process_envs {
1052            cmd.envs(envs);
1053        }
1054        cmd.stderr(Stdio::piped()).spawn()
1055    }
1056}
1057
1058/// Returns the path to Chrome's executable.
1059///
1060/// If the `CHROME` environment variable is set, `default_executable` will
1061/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1062/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1063/// searched for in standard places. If that fails,
1064/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1065/// Windows) is consulted. If all of the above fail, an error is returned.
1066#[deprecated(note = "Use detection::default_executable instead")]
1067pub fn default_executable() -> Result<std::path::PathBuf, String> {
1068    let options = DetectionOptions {
1069        msedge: false,
1070        unstable: false,
1071    };
1072    detection::default_executable(options)
1073}
1074
1075/// These are passed to the Chrome binary by default.
1076/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1077static DEFAULT_ARGS: [&str; 25] = [
1078    "--disable-background-networking",
1079    "--enable-features=NetworkService,NetworkServiceInProcess",
1080    "--disable-background-timer-throttling",
1081    "--disable-backgrounding-occluded-windows",
1082    "--disable-breakpad",
1083    "--disable-client-side-phishing-detection",
1084    "--disable-component-extensions-with-background-pages",
1085    "--disable-default-apps",
1086    "--disable-dev-shm-usage",
1087    "--disable-extensions",
1088    "--disable-features=TranslateUI",
1089    "--disable-hang-monitor",
1090    "--disable-ipc-flooding-protection",
1091    "--disable-popup-blocking",
1092    "--disable-prompt-on-repost",
1093    "--disable-renderer-backgrounding",
1094    "--disable-sync",
1095    "--force-color-profile=srgb",
1096    "--metrics-recording-only",
1097    "--no-first-run",
1098    "--enable-automation",
1099    "--password-store=basic",
1100    "--use-mock-keychain",
1101    "--enable-blink-features=IdleDetection",
1102    "--lang=en_US",
1103];