chromiumoxide/
browser.rs

1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6    io,
7    path::{Path, PathBuf},
8};
9
10use futures::channel::mpsc::{channel, unbounded, Sender};
11use futures::channel::oneshot::channel as oneshot_channel;
12use futures::select;
13use futures::SinkExt;
14
15use crate::async_process::{self, Child, ExitStatus, Stdio};
16use crate::cmd::{to_command_response, CommandMessage};
17use crate::conn::Connection;
18use crate::detection::{self, DetectionOptions};
19use crate::error::{BrowserStderr, CdpError, Result};
20use crate::handler::blockers::intercept_manager::NetworkInterceptManager;
21use crate::handler::browser::BrowserContext;
22use crate::handler::viewport::Viewport;
23use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
24use crate::listeners::{EventListenerRequest, EventStream};
25use crate::page::Page;
26use crate::utils;
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams, TargetId,
36    TargetInfo,
37};
38use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
39use chromiumoxide_types::*;
40
41/// Default `Browser::launch` timeout in MS
42pub const LAUNCH_TIMEOUT: u64 = 20_000;
43
44lazy_static::lazy_static! {
45    /// The request client to get the web socket url.
46    static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
47        .tcp_keepalive(Duration::from_secs(30))
48        .http2_keep_alive_while_idle(true)
49        .timeout(Duration::from_secs(120))
50        .default_headers({
51            let mut m = HeaderMap::new();
52
53            m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55            m
56        })
57        .build()
58        .unwrap();
59}
60
61/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
62#[derive(Debug)]
63pub struct Browser {
64    /// The `Sender` to send messages to the connection handler that drives the
65    /// websocket
66    sender: Sender<HandlerMessage>,
67    /// How the spawned chromium instance was configured, if any
68    config: Option<BrowserConfig>,
69    /// The spawned chromium instance
70    child: Option<Child>,
71    /// The debug web socket url of the chromium instance
72    debug_ws_url: String,
73    /// The context of the browser
74    pub browser_context: BrowserContext,
75}
76
77/// Browser connection information.
78#[derive(serde::Deserialize, Debug, Default)]
79pub struct BrowserConnection {
80    #[serde(rename = "Browser")]
81    /// The browser name
82    pub browser: String,
83    #[serde(rename = "Protocol-Version")]
84    /// Browser version
85    pub protocol_version: String,
86    #[serde(rename = "User-Agent")]
87    /// User Agent used by default.
88    pub user_agent: String,
89    #[serde(rename = "V8-Version")]
90    /// The v8 engine version
91    pub v8_version: String,
92    #[serde(rename = "WebKit-Version")]
93    /// Webkit version
94    pub webkit_version: String,
95    #[serde(rename = "webSocketDebuggerUrl")]
96    /// Remote debugging address
97    pub web_socket_debugger_url: String,
98}
99
100impl Browser {
101    /// Connect to an already running chromium instance via the given URL.
102    ///
103    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
104    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
105        Self::connect_with_config(url, HandlerConfig::default()).await
106    }
107
108    // Connect to an already running chromium instance with a given `HandlerConfig`.
109    ///
110    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
111    pub async fn connect_with_config(
112        url: impl Into<String>,
113        config: HandlerConfig,
114    ) -> Result<(Self, Handler)> {
115        let mut debug_ws_url = url.into();
116
117        if debug_ws_url.starts_with("http") {
118            match REQUEST_CLIENT
119                .get(
120                    if debug_ws_url.ends_with("/json/version")
121                        || debug_ws_url.ends_with("/json/version/")
122                    {
123                        debug_ws_url.to_owned()
124                    } else {
125                        format!(
126                            "{}{}json/version",
127                            &debug_ws_url,
128                            if debug_ws_url.ends_with('/') { "" } else { "/" }
129                        )
130                    },
131                )
132                .send()
133                .await
134            {
135                Ok(req) => {
136                    let connection: BrowserConnection =
137                        crate::serde_json::from_slice(&req.bytes().await.unwrap_or_default())
138                            .unwrap_or_default();
139                    if !connection.web_socket_debugger_url.is_empty() {
140                        debug_ws_url = connection.web_socket_debugger_url;
141                    }
142                }
143                Err(_) => return Err(CdpError::NoResponse),
144            }
145        }
146
147        let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
148
149        let (tx, rx) = channel(1000);
150
151        let fut = Handler::new(conn, rx, config);
152        let browser_context = fut.default_browser_context().clone();
153
154        let browser = Self {
155            sender: tx,
156            config: None,
157            child: None,
158            debug_ws_url,
159            browser_context,
160        };
161
162        Ok((browser, fut))
163    }
164
165    /// Launches a new instance of `chromium` in the background and attaches to
166    /// its debug web socket.
167    ///
168    /// This fails when no chromium executable could be detected.
169    ///
170    /// This fails if no web socket url could be detected from the child
171    /// processes stderr for more than the configured `launch_timeout`
172    /// (20 seconds by default).
173    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
174        // Canonalize paths to reduce issues with sandboxing
175        config.executable = utils::canonicalize_except_snap(config.executable).await?;
176
177        // Launch a new chromium instance
178        let mut child = config.launch()?;
179
180        /// Faillible initialization to run once the child process is created.
181        ///
182        /// All faillible calls must be executed inside this function. This ensures that all
183        /// errors are caught and that the child process is properly cleaned-up.
184        async fn with_child(
185            config: &BrowserConfig,
186            child: &mut Child,
187        ) -> Result<(String, Connection<CdpEventMessage>)> {
188            let dur = config.launch_timeout;
189            let timeout_fut = Box::pin(tokio::time::sleep(dur));
190
191            // extract the ws:
192            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
193            let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
194            Ok((debug_ws_url, conn))
195        }
196
197        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
198            Ok(conn) => conn,
199            Err(e) => {
200                // An initialization error occurred, clean up the process
201                if let Ok(Some(_)) = child.try_wait() {
202                    // already exited, do nothing, may happen if the browser crashed
203                } else {
204                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
205                    child.kill().await.expect("`Browser::launch` failed but could not clean-up the child process (`kill`)");
206                    child.wait().await.expect("`Browser::launch` failed but could not clean-up the child process (`wait`)");
207                }
208                return Err(e);
209            }
210        };
211
212        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
213        // child process.
214
215        let (tx, rx) = channel(1000);
216
217        let handler_config = HandlerConfig {
218            ignore_https_errors: config.ignore_https_errors,
219            viewport: config.viewport.clone(),
220            context_ids: Vec::new(),
221            request_timeout: config.request_timeout,
222            request_intercept: config.request_intercept,
223            cache_enabled: config.cache_enabled,
224            ignore_visuals: config.ignore_visuals,
225            ignore_stylesheets: config.ignore_stylesheets,
226            ignore_javascript: config.ignore_javascript,
227            ignore_analytics: config.ignore_analytics,
228            ignore_ads: config.ignore_ads,
229            extra_headers: config.extra_headers.clone(),
230            only_html: config.only_html,
231            service_worker_enabled: config.service_worker_enabled,
232            created_first_target: false,
233            intercept_manager: config.intercept_manager,
234        };
235
236        let fut = Handler::new(conn, rx, handler_config);
237        let browser_context = fut.default_browser_context().clone();
238
239        let browser = Self {
240            sender: tx,
241            config: Some(config),
242            child: Some(child),
243            debug_ws_url,
244            browser_context,
245        };
246
247        Ok((browser, fut))
248    }
249
250    /// Request to fetch all existing browser targets.
251    ///
252    /// By default, only targets launched after the browser connection are tracked
253    /// when connecting to a existing browser instance with the devtools websocket url
254    /// This function fetches existing targets on the browser and adds them as pages internally
255    ///
256    /// The pages are not guaranteed to be ready as soon as the function returns
257    /// You should wait a few millis if you need to use a page
258    /// Returns [TargetInfo]
259    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
260        let (tx, rx) = oneshot_channel();
261
262        self.sender
263            .clone()
264            .send(HandlerMessage::FetchTargets(tx))
265            .await?;
266
267        rx.await?
268    }
269
270    /// Request for the browser to close completely.
271    ///
272    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
273    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
274    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
275    /// [`Browser::drop`] waits automatically if needed.
276    pub async fn close(&mut self) -> Result<CloseReturns> {
277        let (tx, rx) = oneshot_channel();
278
279        self.sender
280            .clone()
281            .send(HandlerMessage::CloseBrowser(tx))
282            .await?;
283
284        rx.await?
285    }
286
287    /// Asynchronously wait for the spawned chromium instance to exit completely.
288    ///
289    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
290    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
291    /// "zombie" processes.
292    ///
293    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
294    /// connected to an existing browser through [`Browser::connect`])
295    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
296        if let Some(child) = self.child.as_mut() {
297            Ok(Some(child.wait().await?))
298        } else {
299            Ok(None)
300        }
301    }
302
303    /// If the spawned chromium instance has completely exited, wait for it.
304    ///
305    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
306    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
307    /// "zombie" processes.
308    ///
309    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
310    /// connected to an existing browser through [`Browser::connect`])
311    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
312        if let Some(child) = self.child.as_mut() {
313            child.try_wait()
314        } else {
315            Ok(None)
316        }
317    }
318
319    /// Get the spawned chromium instance
320    ///
321    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
322    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
323    ///
324    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
325    /// for the selected runtime.
326    ///
327    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
328    /// connected to an existing browser through [`Browser::connect`])
329    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
330        self.child.as_mut()
331    }
332
333    /// Has a browser instance launched on system.
334    pub fn has_child(&self) -> bool {
335        self.child.is_some()
336    }
337
338    /// Forcibly kill the spawned chromium instance
339    ///
340    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
341    /// process to exit to avoid "zombie" processes.
342    ///
343    /// This method is provided to help if the browser does not close by itself. You should prefer
344    /// to use [`Browser::close`].
345    ///
346    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
347    /// connected to an existing browser through [`Browser::connect`])
348    pub async fn kill(&mut self) -> Option<io::Result<()>> {
349        match self.child.as_mut() {
350            Some(child) => Some(child.kill().await),
351            None => None,
352        }
353    }
354
355    /// If not launched as incognito this creates a new incognito browser
356    /// context. After that this browser exists within the incognito session.
357    /// New pages created while being in incognito mode will also run in the
358    /// incognito context. Incognito contexts won't share cookies/cache with
359    /// other browser contexts.
360    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
361        if !self.is_incognito_configured() {
362            let browser_context_id = self
363                .create_browser_context(CreateBrowserContextParams::default())
364                .await?;
365            self.browser_context = BrowserContext::from(browser_context_id);
366            self.sender
367                .clone()
368                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
369                .await?;
370        }
371
372        Ok(self)
373    }
374
375    /// If a incognito session was created with
376    /// `Browser::start_incognito_context` this disposes this context.
377    ///
378    /// # Note This will also dispose all pages that were running within the
379    /// incognito context.
380    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
381        if let Some(id) = self.browser_context.take() {
382            self.dispose_browser_context(id.clone()).await?;
383            self.sender
384                .clone()
385                .send(HandlerMessage::DisposeContext(BrowserContext::from(id)))
386                .await?;
387        }
388        Ok(self)
389    }
390
391    /// Whether incognito mode was configured from the start
392    fn is_incognito_configured(&self) -> bool {
393        self.config
394            .as_ref()
395            .map(|c| c.incognito)
396            .unwrap_or_default()
397    }
398
399    /// Returns the address of the websocket this browser is attached to
400    pub fn websocket_address(&self) -> &String {
401        &self.debug_ws_url
402    }
403
404    /// Whether the BrowserContext is incognito.
405    pub fn is_incognito(&self) -> bool {
406        self.is_incognito_configured() || self.browser_context.is_incognito()
407    }
408
409    /// The config of the spawned chromium instance if any.
410    pub fn config(&self) -> Option<&BrowserConfig> {
411        self.config.as_ref()
412    }
413
414    /// Create a new browser page
415    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
416        let (tx, rx) = oneshot_channel();
417        let mut params = params.into();
418
419        if let Some(id) = self.browser_context.id() {
420            if params.browser_context_id.is_none() {
421                params.browser_context_id = Some(id.clone());
422            }
423        }
424
425        let _ = self
426            .sender
427            .clone()
428            .send(HandlerMessage::CreatePage(params, tx))
429            .await;
430
431        rx.await?
432    }
433
434    /// Version information about the browser
435    pub async fn version(&self) -> Result<GetVersionReturns> {
436        Ok(self.execute(GetVersionParams::default()).await?.result)
437    }
438
439    /// Returns the user agent of the browser
440    pub async fn user_agent(&self) -> Result<String> {
441        Ok(self.version().await?.user_agent)
442    }
443
444    /// Call a browser method.
445    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
446        let (tx, rx) = oneshot_channel();
447        let method = cmd.identifier();
448        let msg = CommandMessage::new(cmd, tx)?;
449
450        self.sender
451            .clone()
452            .send(HandlerMessage::Command(msg))
453            .await?;
454        let resp = rx.await??;
455        to_command_response::<T>(resp, method)
456    }
457
458    /// Return all of the pages of the browser
459    pub async fn pages(&self) -> Result<Vec<Page>> {
460        let (tx, rx) = oneshot_channel();
461        self.sender
462            .clone()
463            .send(HandlerMessage::GetPages(tx))
464            .await?;
465        Ok(rx.await?)
466    }
467
468    /// Return page of given target_id
469    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
470        let (tx, rx) = oneshot_channel();
471        self.sender
472            .clone()
473            .send(HandlerMessage::GetPage(target_id, tx))
474            .await?;
475        rx.await?.ok_or(CdpError::NotFound)
476    }
477
478    /// Set listener for browser event
479    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
480        let (tx, rx) = unbounded();
481        self.sender
482            .clone()
483            .send(HandlerMessage::AddEventListener(
484                EventListenerRequest::new::<T>(tx),
485            ))
486            .await?;
487
488        Ok(EventStream::new(rx))
489    }
490
491    /// Creates a new empty browser context.
492    pub async fn create_browser_context(
493        &mut self,
494        params: CreateBrowserContextParams,
495    ) -> Result<BrowserContextId> {
496        let response = self.execute(params).await?;
497        Ok(response.result.browser_context_id)
498    }
499
500    /// Send a new empty browser context.
501    pub async fn send_new_context(&mut self, browser_context_id: BrowserContextId) -> Result<()> {
502        self.browser_context = BrowserContext::from(browser_context_id);
503        self.sender
504            .clone()
505            .send(HandlerMessage::InsertContext(self.browser_context.clone()))
506            .await?;
507        Ok(())
508    }
509
510    /// Deletes a browser context.
511    pub async fn dispose_browser_context(
512        &self,
513        browser_context_id: impl Into<BrowserContextId>,
514    ) -> Result<()> {
515        self.execute(DisposeBrowserContextParams::new(browser_context_id))
516            .await?;
517
518        Ok(())
519    }
520
521    /// Clears cookies.
522    pub async fn clear_cookies(&self) -> Result<()> {
523        self.execute(ClearCookiesParams::default()).await?;
524        Ok(())
525    }
526
527    /// Returns all browser cookies.
528    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
529        let mut cmd = GetCookiesParams::default();
530
531        cmd.browser_context_id = self.browser_context.id.clone();
532
533        Ok(self.execute(cmd).await?.result.cookies)
534    }
535
536    /// Sets given cookies.
537    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
538        for cookie in &mut cookies {
539            if let Some(url) = cookie.url.as_ref() {
540                crate::page::validate_cookie_url(url)?;
541            }
542        }
543
544        let mut cookies_param = SetCookiesParams::new(cookies);
545
546        cookies_param.browser_context_id = self.browser_context.id.clone();
547
548        self.execute(cookies_param).await?;
549        Ok(self)
550    }
551}
552
553impl Drop for Browser {
554    fn drop(&mut self) {
555        if let Some(child) = self.child.as_mut() {
556            if let Ok(Some(_)) = child.try_wait() {
557                // Already exited, do nothing. Usually occurs after using the method close or kill.
558            } else {
559                // We set the `kill_on_drop` property for the child process, so no need to explicitely
560                // kill it here. It can't really be done anyway since the method is async.
561                //
562                // On Unix, the process will be reaped in the background by the runtime automatically
563                // so it won't leave any resources locked. It is, however, a better practice for the user to
564                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
565                // warn him here.
566                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
567            }
568        }
569    }
570}
571
572/// Resolve devtools WebSocket URL from the provided browser process
573///
574/// If an error occurs, it returns the browser's stderr output.
575///
576/// The URL resolution fails if:
577/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
578/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
579/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
580///   the browser's stderr: end of stream, invalid UTF-8, other
581async fn ws_url_from_output(
582    child_process: &mut Child,
583    timeout_fut: impl Future<Output = ()> + Unpin,
584) -> Result<String> {
585    use futures::{AsyncBufReadExt, FutureExt};
586    let mut timeout_fut = timeout_fut.fuse();
587    let stderr = child_process.stderr.take().expect("no stderror");
588    let mut stderr_bytes = Vec::<u8>::new();
589    let mut exit_status_fut = Box::pin(child_process.wait()).fuse();
590    let mut buf = futures::io::BufReader::new(stderr);
591    loop {
592        select! {
593            _ = timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
594            exit_status = exit_status_fut => {
595                return Err(match exit_status {
596                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
597                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
598                })
599            },
600            read_res = buf.read_until(b'\n', &mut stderr_bytes).fuse() => {
601                match read_res {
602                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
603                    Ok(byte_count) => {
604                        if byte_count == 0 {
605                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
606                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
607                        }
608                        let start_offset = stderr_bytes.len() - byte_count;
609                        let new_bytes = &stderr_bytes[start_offset..];
610                        match std::str::from_utf8(new_bytes) {
611                            Err(_) => {
612                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
613                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
614                            }
615                            Ok(line) => {
616                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
617                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
618                                        return Ok(ws.trim().to_string());
619                                    }
620                                }
621                            }
622                        }
623                    }
624                }
625            }
626        }
627    }
628}
629
630#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
631pub enum HeadlessMode {
632    /// The "headful" mode.
633    False,
634    /// The old headless mode.
635    #[default]
636    True,
637    /// The new headless mode. See also: https://developer.chrome.com/docs/chromium/new-headless
638    New,
639}
640
641#[derive(Debug, Clone)]
642pub struct BrowserConfig {
643    /// Determines whether to run headless version of the browser. Defaults to
644    /// true.
645    headless: HeadlessMode,
646    /// Determines whether to run the browser with a sandbox.
647    sandbox: bool,
648    /// Launch the browser with a specific window width and height.
649    window_size: Option<(u32, u32)>,
650    /// Launch the browser with a specific debugging port.
651    port: u16,
652    /// Path for Chrome or Chromium.
653    ///
654    /// If unspecified, the create will try to automatically detect a suitable
655    /// binary.
656    executable: std::path::PathBuf,
657
658    /// A list of Chrome extensions to load.
659    ///
660    /// An extension should be a path to a folder containing the extension code.
661    /// CRX files cannot be used directly and must be first extracted.
662    ///
663    /// Note that Chrome does not support loading extensions in headless-mode.
664    /// See https://bugs.chromium.org/p/chromium/issues/detail?id=706008#c5
665    extensions: Vec<String>,
666
667    /// Environment variables to set for the Chromium process.
668    /// Passes value through to std::process::Command::envs.
669    pub process_envs: Option<HashMap<String, String>>,
670
671    /// Data dir for user data
672    pub user_data_dir: Option<PathBuf>,
673
674    /// Whether to launch the `Browser` in incognito mode
675    incognito: bool,
676
677    /// Timeout duration for `Browser::launch`.
678    launch_timeout: Duration,
679
680    /// Ignore https errors, default is true
681    ignore_https_errors: bool,
682    viewport: Option<Viewport>,
683    /// The duration after a request with no response should time out
684    request_timeout: Duration,
685
686    /// Additional command line arguments to pass to the browser instance.
687    args: Vec<String>,
688
689    /// Whether to disable DEFAULT_ARGS or not, default is false
690    disable_default_args: bool,
691
692    /// Whether to enable request interception
693    pub request_intercept: bool,
694
695    /// Whether to enable cache.
696    pub cache_enabled: bool,
697    /// Whether to enable/disable service workers.
698    pub service_worker_enabled: bool,
699
700    /// Whether to ignore visuals when request interception is enabled.
701    pub ignore_visuals: bool,
702    /// Whether to ignore stylesheets when request interception is enabled.
703    pub ignore_stylesheets: bool,
704    /// Whether to ignore javascript when request interception is enabled. This will allow framework JS like react to go through.
705    pub ignore_javascript: bool,
706    /// Whether to ignore analytics when request interception is enabled.
707    pub ignore_analytics: bool,
708    /// Whether to ignore ads when request interception is enabled.
709    pub ignore_ads: bool,
710    /// Extra headers.
711    pub extra_headers: Option<std::collections::HashMap<String, String>>,
712    /// Only html
713    pub only_html: bool,
714    /// The interception intercept manager.
715    pub intercept_manager: NetworkInterceptManager,
716}
717
718#[derive(Debug, Clone)]
719pub struct BrowserConfigBuilder {
720    headless: HeadlessMode,
721    sandbox: bool,
722    window_size: Option<(u32, u32)>,
723    port: u16,
724    executable: Option<PathBuf>,
725    executation_detection: DetectionOptions,
726    extensions: Vec<String>,
727    process_envs: Option<HashMap<String, String>>,
728    user_data_dir: Option<PathBuf>,
729    incognito: bool,
730    launch_timeout: Duration,
731    ignore_https_errors: bool,
732    viewport: Option<Viewport>,
733    request_timeout: Duration,
734    args: Vec<String>,
735    disable_default_args: bool,
736    request_intercept: bool,
737    cache_enabled: bool,
738    service_worker_enabled: bool,
739    ignore_visuals: bool,
740    ignore_ads: bool,
741    ignore_javascript: bool,
742    ignore_stylesheets: bool,
743    ignore_analytics: bool,
744    only_html: bool,
745    extra_headers: Option<std::collections::HashMap<String, String>>,
746    intercept_manager: NetworkInterceptManager,
747}
748
749impl BrowserConfig {
750    pub fn builder() -> BrowserConfigBuilder {
751        BrowserConfigBuilder::default()
752    }
753
754    pub fn with_executable(path: impl AsRef<Path>) -> Self {
755        Self::builder().chrome_executable(path).build().unwrap()
756    }
757}
758
759impl Default for BrowserConfigBuilder {
760    fn default() -> Self {
761        Self {
762            headless: HeadlessMode::True,
763            sandbox: true,
764            window_size: None,
765            port: 0,
766            executable: None,
767            executation_detection: DetectionOptions::default(),
768            extensions: Vec::new(),
769            process_envs: None,
770            user_data_dir: None,
771            incognito: false,
772            launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
773            ignore_https_errors: true,
774            viewport: Some(Default::default()),
775            request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
776            args: Vec::new(),
777            disable_default_args: false,
778            request_intercept: false,
779            cache_enabled: true,
780            ignore_visuals: false,
781            ignore_ads: false,
782            ignore_javascript: false,
783            ignore_analytics: false,
784            ignore_stylesheets: false,
785            only_html: false,
786            extra_headers: Default::default(),
787            service_worker_enabled: true,
788            intercept_manager: NetworkInterceptManager::UNKNOWN,
789        }
790    }
791}
792
793impl BrowserConfigBuilder {
794    pub fn window_size(mut self, width: u32, height: u32) -> Self {
795        self.window_size = Some((width, height));
796        self
797    }
798
799    pub fn no_sandbox(mut self) -> Self {
800        self.sandbox = false;
801        self
802    }
803
804    pub fn with_head(mut self) -> Self {
805        self.headless = HeadlessMode::False;
806        self
807    }
808
809    pub fn new_headless_mode(mut self) -> Self {
810        self.headless = HeadlessMode::New;
811        self
812    }
813
814    pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
815        self.headless = mode;
816        self
817    }
818
819    pub fn incognito(mut self) -> Self {
820        self.incognito = true;
821        self
822    }
823
824    pub fn respect_https_errors(mut self) -> Self {
825        self.ignore_https_errors = false;
826        self
827    }
828
829    pub fn port(mut self, port: u16) -> Self {
830        self.port = port;
831        self
832    }
833
834    pub fn launch_timeout(mut self, timeout: Duration) -> Self {
835        self.launch_timeout = timeout;
836        self
837    }
838
839    pub fn request_timeout(mut self, timeout: Duration) -> Self {
840        self.request_timeout = timeout;
841        self
842    }
843
844    /// Configures the viewport of the browser, which defaults to `800x600`.
845    /// `None` disables viewport emulation (i.e., it uses the browsers default
846    /// configuration, which fills the available space. This is similar to what
847    /// Playwright does when you provide `null` as the value of its `viewport`
848    /// option).
849    pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
850        self.viewport = viewport.into();
851        self
852    }
853
854    pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
855        self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
856        self
857    }
858
859    pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
860        self.executable = Some(path.as_ref().to_path_buf());
861        self
862    }
863
864    pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
865        self.executation_detection = options;
866        self
867    }
868
869    pub fn extension(mut self, extension: impl Into<String>) -> Self {
870        self.extensions.push(extension.into());
871        self
872    }
873
874    pub fn extensions<I, S>(mut self, extensions: I) -> Self
875    where
876        I: IntoIterator<Item = S>,
877        S: Into<String>,
878    {
879        for ext in extensions {
880            self.extensions.push(ext.into());
881        }
882        self
883    }
884
885    pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
886        self.process_envs
887            .get_or_insert(HashMap::new())
888            .insert(key.into(), val.into());
889        self
890    }
891
892    pub fn envs<I, K, V>(mut self, envs: I) -> Self
893    where
894        I: IntoIterator<Item = (K, V)>,
895        K: Into<String>,
896        V: Into<String>,
897    {
898        self.process_envs
899            .get_or_insert(HashMap::new())
900            .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
901        self
902    }
903
904    pub fn arg(mut self, arg: impl Into<String>) -> Self {
905        self.args.push(arg.into());
906        self
907    }
908
909    pub fn args<I, S>(mut self, args: I) -> Self
910    where
911        I: IntoIterator<Item = S>,
912        S: Into<String>,
913    {
914        for arg in args {
915            self.args.push(arg.into());
916        }
917        self
918    }
919
920    pub fn disable_default_args(mut self) -> Self {
921        self.disable_default_args = true;
922        self
923    }
924
925    pub fn enable_request_intercept(mut self) -> Self {
926        self.request_intercept = true;
927        self
928    }
929
930    pub fn disable_request_intercept(mut self) -> Self {
931        self.request_intercept = false;
932        self
933    }
934
935    pub fn enable_cache(mut self) -> Self {
936        self.cache_enabled = true;
937        self
938    }
939
940    pub fn disable_cache(mut self) -> Self {
941        self.cache_enabled = false;
942        self
943    }
944
945    pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
946        self.service_worker_enabled = bypass;
947        self
948    }
949
950    pub fn set_extra_headers(
951        mut self,
952        headers: Option<std::collections::HashMap<String, String>>,
953    ) -> Self {
954        self.extra_headers = headers;
955        self
956    }
957    pub fn build(self) -> std::result::Result<BrowserConfig, String> {
958        let executable = if let Some(e) = self.executable {
959            e
960        } else {
961            detection::default_executable(self.executation_detection)?
962        };
963
964        Ok(BrowserConfig {
965            headless: self.headless,
966            sandbox: self.sandbox,
967            window_size: self.window_size,
968            port: self.port,
969            executable,
970            extensions: self.extensions,
971            process_envs: self.process_envs,
972            user_data_dir: self.user_data_dir,
973            incognito: self.incognito,
974            launch_timeout: self.launch_timeout,
975            ignore_https_errors: self.ignore_https_errors,
976            viewport: self.viewport,
977            request_timeout: self.request_timeout,
978            args: self.args,
979            disable_default_args: self.disable_default_args,
980            request_intercept: self.request_intercept,
981            cache_enabled: self.cache_enabled,
982            ignore_visuals: self.ignore_visuals,
983            ignore_ads: self.ignore_ads,
984            ignore_javascript: self.ignore_javascript,
985            ignore_analytics: self.ignore_analytics,
986            ignore_stylesheets: self.ignore_stylesheets,
987            extra_headers: self.extra_headers,
988            only_html: self.only_html,
989            intercept_manager: self.intercept_manager,
990            service_worker_enabled: self.service_worker_enabled,
991        })
992    }
993}
994
995impl BrowserConfig {
996    pub fn launch(&self) -> io::Result<Child> {
997        let mut cmd = async_process::Command::new(&self.executable);
998
999        if self.disable_default_args {
1000            cmd.args(&self.args);
1001        } else {
1002            cmd.args(DEFAULT_ARGS).args(&self.args);
1003        }
1004
1005        if !self
1006            .args
1007            .iter()
1008            .any(|arg| arg.contains("--remote-debugging-port="))
1009        {
1010            cmd.arg(format!("--remote-debugging-port={}", self.port));
1011        }
1012
1013        cmd.args(
1014            self.extensions
1015                .iter()
1016                .map(|e| format!("--load-extension={e}")),
1017        );
1018
1019        if let Some(ref user_data) = self.user_data_dir {
1020            cmd.arg(format!("--user-data-dir={}", user_data.display()));
1021        } else {
1022            // If the user did not specify a data directory, this would default to the systems default
1023            // data directory. In most cases, we would rather have a fresh instance of Chromium. Specify
1024            // a temp dir just for chromiumoxide instead.
1025            cmd.arg(format!(
1026                "--user-data-dir={}",
1027                std::env::temp_dir().join("chromiumoxide-runner").display()
1028            ));
1029        }
1030
1031        if let Some((width, height)) = self.window_size {
1032            cmd.arg(format!("--window-size={width},{height}"));
1033        }
1034
1035        if !self.sandbox {
1036            cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1037        }
1038
1039        match self.headless {
1040            HeadlessMode::False => (),
1041            HeadlessMode::True => {
1042                cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1043            }
1044            HeadlessMode::New => {
1045                cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1046            }
1047        }
1048
1049        if self.incognito {
1050            cmd.arg("--incognito");
1051        }
1052
1053        if let Some(ref envs) = self.process_envs {
1054            cmd.envs(envs);
1055        }
1056        cmd.stderr(Stdio::piped()).spawn()
1057    }
1058}
1059
1060/// Returns the path to Chrome's executable.
1061///
1062/// If the `CHROME` environment variable is set, `default_executable` will
1063/// use it as the default path. Otherwise, the filenames `google-chrome-stable`
1064/// `chromium`, `chromium-browser`, `chrome` and `chrome-browser` are
1065/// searched for in standard places. If that fails,
1066/// `/Applications/Google Chrome.app/...` (on MacOS) or the registry (on
1067/// Windows) is consulted. If all of the above fail, an error is returned.
1068#[deprecated(note = "Use detection::default_executable instead")]
1069pub fn default_executable() -> Result<std::path::PathBuf, String> {
1070    let options = DetectionOptions {
1071        msedge: false,
1072        unstable: false,
1073    };
1074    detection::default_executable(options)
1075}
1076
1077/// These are passed to the Chrome binary by default.
1078/// Via https://github.com/puppeteer/puppeteer/blob/4846b8723cf20d3551c0d755df394cc5e0c82a94/src/node/Launcher.ts#L157
1079static DEFAULT_ARGS: [&str; 25] = [
1080    "--disable-background-networking",
1081    "--enable-features=NetworkService,NetworkServiceInProcess",
1082    "--disable-background-timer-throttling",
1083    "--disable-backgrounding-occluded-windows",
1084    "--disable-breakpad",
1085    "--disable-client-side-phishing-detection",
1086    "--disable-component-extensions-with-background-pages",
1087    "--disable-default-apps",
1088    "--disable-dev-shm-usage",
1089    "--disable-extensions",
1090    "--disable-features=TranslateUI",
1091    "--disable-hang-monitor",
1092    "--disable-ipc-flooding-protection",
1093    "--disable-popup-blocking",
1094    "--disable-prompt-on-repost",
1095    "--disable-renderer-backgrounding",
1096    "--disable-sync",
1097    "--force-color-profile=srgb",
1098    "--metrics-recording-only",
1099    "--no-first-run",
1100    "--enable-automation",
1101    "--password-store=basic",
1102    "--use-mock-keychain",
1103    "--enable-blink-features=IdleDetection",
1104    "--lang=en_US",
1105];