Skip to main content

chaser_oxide/browser/
mod.rs

1use std::future::Future;
2use std::io;
3
4use futures::channel::mpsc::{channel, unbounded, Sender};
5use futures::channel::oneshot::channel as oneshot_channel;
6use futures::select;
7use futures::SinkExt;
8
9use chromiumoxide_cdp::cdp::browser_protocol::browser::{
10    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
11};
12use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
13use chromiumoxide_cdp::cdp::browser_protocol::storage::{
14    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
15};
16use chromiumoxide_cdp::cdp::browser_protocol::target::{
17    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams, TargetId,
18    TargetInfo,
19};
20use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
21use chromiumoxide_types::*;
22
23pub use self::config::{BrowserConfig, BrowserConfigBuilder, LAUNCH_TIMEOUT};
24use crate::async_process::{Child, ExitStatus};
25use crate::cmd::{to_command_response, CommandMessage};
26use crate::conn::Connection;
27use crate::error::{BrowserStderr, CdpError, Result};
28use crate::handler::browser::BrowserContext;
29use crate::handler::{Handler, HandlerConfig, HandlerMessage};
30use crate::listeners::{EventListenerRequest, EventStream};
31use crate::page::Page;
32use crate::utils;
33
34mod argument;
35mod config;
36
37/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
38#[derive(Debug)]
39pub struct Browser {
40    /// The `Sender` to send messages to the connection handler that drives the
41    /// websocket
42    sender: Sender<HandlerMessage>,
43    /// How the spawned chromium instance was configured, if any
44    config: Option<BrowserConfig>,
45    /// The spawned chromium instance
46    child: Option<Child>,
47    /// The debug web socket url of the chromium instance
48    debug_ws_url: String,
49    /// The context of the browser
50    browser_context: BrowserContext,
51}
52
53/// Browser connection information.
54#[derive(serde::Deserialize, Debug, Default)]
55pub struct BrowserConnection {
56    #[serde(rename = "Browser")]
57    /// The browser name
58    pub browser: String,
59    #[serde(rename = "Protocol-Version")]
60    /// Browser version
61    pub protocol_version: String,
62    #[serde(rename = "User-Agent")]
63    /// User Agent used by default.
64    pub user_agent: String,
65    #[serde(rename = "V8-Version")]
66    /// The v8 engine version
67    pub v8_version: String,
68    #[serde(rename = "WebKit-Version")]
69    /// Webkit version
70    pub webkit_version: String,
71    #[serde(rename = "webSocketDebuggerUrl")]
72    /// Remote debugging address
73    pub web_socket_debugger_url: String,
74}
75
76impl Browser {
77    /// Connect to an already running chromium instance via the given URL.
78    ///
79    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
80    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
81        Self::connect_with_config(url, HandlerConfig::default()).await
82    }
83
84    // Connect to an already running chromium instance with a given `HandlerConfig`.
85    ///
86    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
87    pub async fn connect_with_config(
88        url: impl Into<String>,
89        config: HandlerConfig,
90    ) -> Result<(Self, Handler)> {
91        let mut debug_ws_url = url.into();
92
93        if debug_ws_url.starts_with("http") {
94            match reqwest::Client::new()
95                .get(
96                    if debug_ws_url.ends_with("/json/version")
97                        || debug_ws_url.ends_with("/json/version/")
98                    {
99                        debug_ws_url.clone()
100                    } else {
101                        format!(
102                            "{}{}json/version",
103                            &debug_ws_url,
104                            if debug_ws_url.ends_with('/') { "" } else { "/" }
105                        )
106                    },
107                )
108                .header("content-type", "application/json")
109                .send()
110                .await
111            {
112                Ok(req) => {
113                    let socketaddr = req.remote_addr().unwrap();
114                    let connection: BrowserConnection =
115                        serde_json::from_slice(&req.bytes().await.unwrap_or_default())
116                            .unwrap_or_default();
117
118                    if !connection.web_socket_debugger_url.is_empty() {
119                        // prevent proxy interfaces from returning local ips to connect to the exact machine
120                        debug_ws_url = connection
121                            .web_socket_debugger_url
122                            .replace("127.0.0.1", &socketaddr.ip().to_string());
123                    }
124                }
125                Err(_) => return Err(CdpError::NoResponse),
126            }
127        }
128
129        let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
130
131        let (tx, rx) = channel(1);
132
133        let fut = Handler::new(conn, rx, config);
134        let browser_context = fut.default_browser_context().clone();
135
136        let browser = Self {
137            sender: tx,
138            config: None,
139            child: None,
140            debug_ws_url,
141            browser_context,
142        };
143        Ok((browser, fut))
144    }
145
146    /// Launches a new instance of `chromium` in the background and attaches to
147    /// its debug web socket.
148    ///
149    /// This fails when no chromium executable could be detected.
150    ///
151    /// This fails if no web socket url could be detected from the child
152    /// processes stderr for more than the configured `launch_timeout`
153    /// (20 seconds by default).
154    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
155        // Canonalize paths to reduce issues with sandboxing
156        config.executable = utils::canonicalize_except_snap(config.executable).await?;
157
158        // Launch a new chromium instance
159        let mut child = config.launch()?;
160
161        /// Faillible initialization to run once the child process is created.
162        ///
163        /// All faillible calls must be executed inside this function. This ensures that all
164        /// errors are caught and that the child process is properly cleaned-up.
165        async fn with_child(
166            config: &BrowserConfig,
167            child: &mut Child,
168        ) -> Result<(String, Connection<CdpEventMessage>)> {
169            let dur = config.launch_timeout;
170            cfg_if::cfg_if! {
171                if #[cfg(feature = "async-std-runtime")] {
172                    let timeout_fut = Box::pin(async_std::task::sleep(dur));
173                } else if #[cfg(feature = "tokio-runtime")] {
174                    let timeout_fut = Box::pin(tokio::time::sleep(dur));
175                } else {
176                    panic!("missing chromiumoxide runtime: enable `async-std-runtime` or `tokio-runtime`")
177                }
178            };
179            // extract the ws:
180            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
181            let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
182            Ok((debug_ws_url, conn))
183        }
184
185        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
186            Ok(conn) => conn,
187            Err(e) => {
188                // An initialization error occurred, clean up the process
189                if let Ok(Some(_)) = child.try_wait() {
190                    // already exited, do nothing, may happen if the browser crashed
191                } else {
192                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
193                    child.kill().await.expect("`Browser::launch` failed but could not clean-up the child process (`kill`)");
194                    child.wait().await.expect("`Browser::launch` failed but could not clean-up the child process (`wait`)");
195                }
196                return Err(e);
197            }
198        };
199
200        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
201        // child process.
202
203        let (tx, rx) = channel(1);
204
205        let handler_config = HandlerConfig {
206            ignore_https_errors: config.ignore_https_errors,
207            ignore_invalid_messages: config.ignore_invalid_messages,
208            viewport: config.viewport.clone(),
209            context_ids: Vec::new(),
210            request_timeout: config.request_timeout,
211            request_intercept: config.request_intercept,
212            cache_enabled: config.cache_enabled,
213        };
214
215        let fut = Handler::new(conn, rx, handler_config);
216        let browser_context = fut.default_browser_context().clone();
217
218        let browser = Self {
219            sender: tx,
220            config: Some(config),
221            child: Some(child),
222            debug_ws_url,
223            browser_context,
224        };
225
226        Ok((browser, fut))
227    }
228
229    /// Request to fetch all existing browser targets.
230    ///
231    /// By default, only targets launched after the browser connection are tracked
232    /// when connecting to a existing browser instance with the devtools websocket url
233    /// This function fetches existing targets on the browser and adds them as pages internally
234    ///
235    /// The pages are not guaranteed to be ready as soon as the function returns
236    /// You should wait a few millis if you need to use a page
237    /// Returns [TargetInfo]
238    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
239        let (tx, rx) = oneshot_channel();
240
241        self.sender
242            .clone()
243            .send(HandlerMessage::FetchTargets(tx))
244            .await?;
245
246        rx.await?
247    }
248
249    /// Request for the browser to close completely.
250    ///
251    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
252    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
253    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
254    /// [`Browser::drop`] waits automatically if needed.
255    pub async fn close(&mut self) -> Result<CloseReturns> {
256        let (tx, rx) = oneshot_channel();
257
258        self.sender
259            .clone()
260            .send(HandlerMessage::CloseBrowser(tx))
261            .await?;
262
263        rx.await?
264    }
265
266    /// Asynchronously wait for the spawned chromium instance to exit completely.
267    ///
268    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
269    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
270    /// "zombie" processes.
271    ///
272    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
273    /// connected to an existing browser through [`Browser::connect`])
274    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
275        if let Some(child) = self.child.as_mut() {
276            Ok(Some(child.wait().await?))
277        } else {
278            Ok(None)
279        }
280    }
281
282    /// If the spawned chromium instance has completely exited, wait for it.
283    ///
284    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
285    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
286    /// "zombie" processes.
287    ///
288    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
289    /// connected to an existing browser through [`Browser::connect`])
290    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
291        if let Some(child) = self.child.as_mut() {
292            child.try_wait()
293        } else {
294            Ok(None)
295        }
296    }
297
298    /// Get the spawned chromium instance
299    ///
300    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
301    /// value. It acts as a compat wrapper for an `async-std` or `tokio` child process.
302    ///
303    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
304    /// for the selected runtime.
305    ///
306    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
307    /// connected to an existing browser through [`Browser::connect`])
308    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
309        self.child.as_mut()
310    }
311
312    /// Forcibly kill the spawned chromium instance
313    ///
314    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
315    /// process to exit to avoid "zombie" processes.
316    ///
317    /// This method is provided to help if the browser does not close by itself. You should prefer
318    /// to use [`Browser::close`].
319    ///
320    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
321    /// connected to an existing browser through [`Browser::connect`])
322    pub async fn kill(&mut self) -> Option<io::Result<()>> {
323        match self.child.as_mut() {
324            Some(child) => Some(child.kill().await),
325            None => None,
326        }
327    }
328
329    /// If not launched as incognito this creates a new incognito browser
330    /// context. After that this browser exists within the incognito session.
331    /// New pages created while being in incognito mode will also run in the
332    /// incognito context. Incognito contexts won't share cookies/cache with
333    /// other browser contexts.
334    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
335        if !self.is_incognito_configured() {
336            let browser_context_id = self
337                .create_browser_context(CreateBrowserContextParams::default())
338                .await?;
339            self.browser_context = BrowserContext::from(browser_context_id);
340            self.sender
341                .clone()
342                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
343                .await?;
344        }
345
346        Ok(self)
347    }
348
349    /// If a incognito session was created with
350    /// `Browser::start_incognito_context` this disposes this context.
351    ///
352    /// # Note This will also dispose all pages that were running within the
353    /// incognito context.
354    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
355        if let Some(id) = self.browser_context.take() {
356            self.dispose_browser_context(id.clone()).await?;
357            self.sender
358                .clone()
359                .send(HandlerMessage::DisposeContext(BrowserContext::from(id)))
360                .await?;
361        }
362        Ok(self)
363    }
364
365    /// Whether incognito mode was configured from the start
366    fn is_incognito_configured(&self) -> bool {
367        self.config
368            .as_ref()
369            .map(|c| c.incognito)
370            .unwrap_or_default()
371    }
372
373    /// Returns the address of the websocket this browser is attached to
374    pub fn websocket_address(&self) -> &String {
375        &self.debug_ws_url
376    }
377
378    /// Whether the BrowserContext is incognito.
379    pub fn is_incognito(&self) -> bool {
380        self.is_incognito_configured() || self.browser_context.is_incognito()
381    }
382
383    /// The config of the spawned chromium instance if any.
384    pub fn config(&self) -> Option<&BrowserConfig> {
385        self.config.as_ref()
386    }
387
388    /// Create a new browser page
389    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
390        let (tx, rx) = oneshot_channel();
391        let mut params = params.into();
392        if let Some(id) = self.browser_context.id() {
393            if params.browser_context_id.is_none() {
394                params.browser_context_id = Some(id.clone());
395            }
396        }
397
398        self.sender
399            .clone()
400            .send(HandlerMessage::CreatePage(params, tx))
401            .await?;
402
403        rx.await?
404    }
405
406    /// Version information about the browser
407    pub async fn version(&self) -> Result<GetVersionReturns> {
408        Ok(self.execute(GetVersionParams::default()).await?.result)
409    }
410
411    /// Returns the user agent of the browser
412    pub async fn user_agent(&self) -> Result<String> {
413        Ok(self.version().await?.user_agent)
414    }
415
416    /// Call a browser method.
417    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
418        let (tx, rx) = oneshot_channel();
419        let method = cmd.identifier();
420        let msg = CommandMessage::new(cmd, tx)?;
421
422        self.sender
423            .clone()
424            .send(HandlerMessage::Command(msg))
425            .await?;
426        let resp = rx.await??;
427        to_command_response::<T>(resp, method)
428    }
429
430    /// Return all of the pages of the browser
431    pub async fn pages(&self) -> Result<Vec<Page>> {
432        let (tx, rx) = oneshot_channel();
433        self.sender
434            .clone()
435            .send(HandlerMessage::GetPages(tx))
436            .await?;
437        Ok(rx.await?)
438    }
439
440    /// Return page of given target_id
441    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
442        let (tx, rx) = oneshot_channel();
443        self.sender
444            .clone()
445            .send(HandlerMessage::GetPage(target_id, tx))
446            .await?;
447        rx.await?.ok_or(CdpError::NotFound)
448    }
449
450    /// Set listener for browser event
451    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
452        let (tx, rx) = unbounded();
453        self.sender
454            .clone()
455            .send(HandlerMessage::AddEventListener(
456                EventListenerRequest::new::<T>(tx),
457            ))
458            .await?;
459
460        Ok(EventStream::new(rx))
461    }
462
463    /// Creates a new empty browser context.
464    pub async fn create_browser_context(
465        &self,
466        params: CreateBrowserContextParams,
467    ) -> Result<BrowserContextId> {
468        let response = self.execute(params).await?;
469        Ok(response.result.browser_context_id)
470    }
471
472    /// Deletes a browser context.
473    pub async fn dispose_browser_context(
474        &self,
475        browser_context_id: impl Into<BrowserContextId>,
476    ) -> Result<()> {
477        self.execute(DisposeBrowserContextParams::new(browser_context_id))
478            .await?;
479
480        Ok(())
481    }
482
483    /// Creates a new incognito browser context with a specified proxy.
484    ///
485    /// The proxy should be in the format `scheme://host:port` (e.g., `http://10.10.1.1:8080`).
486    /// Note: Authentication via `user:pass@host:port` in `proxy_server` string is generally
487    /// NOT supported by Chrome directly for contexts. You may need to handle auth challenges separately.
488    pub async fn create_incognito_context_with_proxy(
489        &self,
490        proxy_server: impl Into<String>,
491    ) -> Result<BrowserContextId> {
492        let params = CreateBrowserContextParams::builder()
493            .proxy_server(proxy_server)
494            .build();
495        self.create_browser_context(params).await
496    }
497
498    /// Clears cookies.
499    pub async fn clear_cookies(&self) -> Result<()> {
500        self.execute(ClearCookiesParams::default()).await?;
501        Ok(())
502    }
503
504    /// Returns all browser cookies.
505    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
506        Ok(self
507            .execute(GetCookiesParams::default())
508            .await?
509            .result
510            .cookies)
511    }
512
513    /// Sets given cookies.
514    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
515        for cookie in &mut cookies {
516            if let Some(url) = cookie.url.as_ref() {
517                crate::page::validate_cookie_url(url)?;
518            }
519        }
520
521        self.execute(SetCookiesParams::new(cookies)).await?;
522        Ok(self)
523    }
524}
525
526impl Drop for Browser {
527    fn drop(&mut self) {
528        if let Some(child) = self.child.as_mut() {
529            if let Ok(Some(_)) = child.try_wait() {
530                // Already exited, do nothing. Usually occurs after using the method close or kill.
531            } else {
532                // We set the `kill_on_drop` property for the child process, so no need to explicitely
533                // kill it here. It can't really be done anyway since the method is async.
534                //
535                // On Unix, the process will be reaped in the background by the runtime automatically
536                // so it won't leave any resources locked. It is, however, a better practice for the user to
537                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
538                // warn him here.
539                tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
540            }
541        }
542    }
543}
544
545/// Resolve devtools WebSocket URL from the provided browser process
546///
547/// If an error occurs, it returns the browser's stderr output.
548///
549/// The URL resolution fails if:
550/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
551/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
552/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
553///   the browser's stderr: end of stream, invalid UTF-8, other
554async fn ws_url_from_output(
555    child_process: &mut Child,
556    timeout_fut: impl Future<Output = ()> + Unpin,
557) -> Result<String> {
558    use futures::{AsyncBufReadExt, FutureExt};
559    let mut timeout_fut = timeout_fut.fuse();
560    let stderr = child_process.stderr.take().expect("no stderror");
561    let mut stderr_bytes = Vec::<u8>::new();
562    let mut exit_status_fut = Box::pin(child_process.wait()).fuse();
563    let mut buf = futures::io::BufReader::new(stderr);
564    loop {
565        select! {
566            _ = timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
567            exit_status = exit_status_fut => {
568                return Err(match exit_status {
569                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
570                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
571                })
572            },
573            read_res = buf.read_until(b'\n', &mut stderr_bytes).fuse() => {
574                match read_res {
575                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
576                    Ok(byte_count) => {
577                        if byte_count == 0 {
578                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
579                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
580                        }
581                        let start_offset = stderr_bytes.len() - byte_count;
582                        let new_bytes = &stderr_bytes[start_offset..];
583                        match std::str::from_utf8(new_bytes) {
584                            Err(_) => {
585                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
586                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
587                            }
588                            Ok(line) => {
589                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
590                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
591                                        return Ok(ws.trim().to_string());
592                                    }
593                                }
594                            }
595                        }
596                    }
597                }
598            }
599        }
600    }
601}