Skip to main content

chaser_oxide/browser/
mod.rs

1use std::future::Future;
2use std::io;
3
4use futures::SinkExt;
5use futures::channel::mpsc::{Sender, channel, unbounded};
6use futures::channel::oneshot::channel as oneshot_channel;
7use futures::select;
8
9use chromiumoxide_cdp::cdp::browser_protocol::browser::{
10    BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
11};
12use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
13use chromiumoxide_cdp::cdp::browser_protocol::storage::{
14    ClearCookiesParams, GetCookiesParams, SetCookiesParams,
15};
16use chromiumoxide_cdp::cdp::browser_protocol::target::{
17    CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams, TargetId,
18    TargetInfo,
19};
20use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
21use chromiumoxide_types::*;
22
23pub use self::config::{BrowserConfig, BrowserConfigBuilder, LAUNCH_TIMEOUT};
24use crate::async_process::{Child, ExitStatus};
25use crate::cmd::{CommandMessage, to_command_response};
26use crate::conn::Connection;
27use crate::error::{BrowserStderr, CdpError, Result};
28use crate::handler::browser::BrowserContext;
29use crate::handler::{Handler, HandlerConfig, HandlerMessage};
30use crate::listeners::{EventListenerRequest, EventStream};
31use crate::page::Page;
32use crate::utils;
33
34mod argument;
35mod config;
36
37/// A [`Browser`] is created when chromiumoxide connects to a Chromium instance.
38#[derive(Debug)]
39pub struct Browser {
40    /// The `Sender` to send messages to the connection handler that drives the
41    /// websocket
42    sender: Sender<HandlerMessage>,
43    /// How the spawned chromium instance was configured, if any
44    config: Option<BrowserConfig>,
45    /// The spawned chromium instance
46    child: Option<Child>,
47    /// The debug web socket url of the chromium instance
48    debug_ws_url: String,
49    /// The context of the browser
50    browser_context: BrowserContext,
51}
52
53/// Browser connection information.
54#[derive(serde::Deserialize, Debug, Default)]
55pub struct BrowserConnection {
56    #[serde(rename = "Browser")]
57    /// The browser name
58    pub browser: String,
59    #[serde(rename = "Protocol-Version")]
60    /// Browser version
61    pub protocol_version: String,
62    #[serde(rename = "User-Agent")]
63    /// User Agent used by default.
64    pub user_agent: String,
65    #[serde(rename = "V8-Version")]
66    /// The v8 engine version
67    pub v8_version: String,
68    #[serde(rename = "WebKit-Version")]
69    /// Webkit version
70    pub webkit_version: String,
71    #[serde(rename = "webSocketDebuggerUrl")]
72    /// Remote debugging address
73    pub web_socket_debugger_url: String,
74}
75
76impl Browser {
77    /// Connect to an already running chromium instance via the given URL.
78    ///
79    /// If the URL is a http(s) URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
80    pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
81        Self::connect_with_config(url, HandlerConfig::default()).await
82    }
83
84    // Connect to an already running chromium instance with a given `HandlerConfig`.
85    ///
86    /// If the URL is a http URL, it will first attempt to retrieve the Websocket URL from the `json/version` endpoint.
87    pub async fn connect_with_config(
88        url: impl Into<String>,
89        config: HandlerConfig,
90    ) -> Result<(Self, Handler)> {
91        let mut debug_ws_url = url.into();
92
93        if debug_ws_url.starts_with("http") {
94            match reqwest::Client::new()
95                .get(
96                    if debug_ws_url.ends_with("/json/version")
97                        || debug_ws_url.ends_with("/json/version/")
98                    {
99                        debug_ws_url.clone()
100                    } else {
101                        format!(
102                            "{}{}json/version",
103                            &debug_ws_url,
104                            if debug_ws_url.ends_with('/') { "" } else { "/" }
105                        )
106                    },
107                )
108                .header("content-type", "application/json")
109                .send()
110                .await
111            {
112                Ok(req) => {
113                    let socketaddr = req.remote_addr().unwrap();
114                    let connection: BrowserConnection =
115                        serde_json::from_slice(&req.bytes().await.unwrap_or_default())
116                            .unwrap_or_default();
117
118                    if !connection.web_socket_debugger_url.is_empty() {
119                        // prevent proxy interfaces from returning local ips to connect to the exact machine
120                        debug_ws_url = connection
121                            .web_socket_debugger_url
122                            .replace("127.0.0.1", &socketaddr.ip().to_string());
123                    }
124                }
125                Err(_) => return Err(CdpError::NoResponse),
126            }
127        }
128
129        let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
130
131        let (tx, rx) = channel(1);
132
133        let fut = Handler::new(conn, rx, config);
134        let browser_context = fut.default_browser_context().clone();
135
136        let browser = Self {
137            sender: tx,
138            config: None,
139            child: None,
140            debug_ws_url,
141            browser_context,
142        };
143        Ok((browser, fut))
144    }
145
146    /// Launches a new instance of `chromium` in the background and attaches to
147    /// its debug web socket.
148    ///
149    /// This fails when no chromium executable could be detected.
150    ///
151    /// This fails if no web socket url could be detected from the child
152    /// processes stderr for more than the configured `launch_timeout`
153    /// (20 seconds by default).
154    pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
155        // Canonalize paths to reduce issues with sandboxing
156        config.executable = utils::canonicalize_except_snap(config.executable).await?;
157
158        // Launch a new chromium instance
159        let mut child = config.launch()?;
160
161        /// Faillible initialization to run once the child process is created.
162        ///
163        /// All faillible calls must be executed inside this function. This ensures that all
164        /// errors are caught and that the child process is properly cleaned-up.
165        async fn with_child(
166            config: &BrowserConfig,
167            child: &mut Child,
168        ) -> Result<(String, Connection<CdpEventMessage>)> {
169            let dur = config.launch_timeout;
170            let timeout_fut = Box::pin(tokio::time::sleep(dur));
171
172            // extract the ws:
173            let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
174            let conn = Connection::<CdpEventMessage>::connect(&debug_ws_url).await?;
175            Ok((debug_ws_url, conn))
176        }
177
178        let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
179            Ok(conn) => conn,
180            Err(e) => {
181                // An initialization error occurred, clean up the process
182                if let Ok(Some(_)) = child.try_wait() {
183                    // already exited, do nothing, may happen if the browser crashed
184                } else {
185                    // the process is still alive, kill it and wait for exit (avoid zombie processes)
186                    child.kill().await.expect("`Browser::launch` failed but could not clean-up the child process (`kill`)");
187                    child.wait().await.expect("`Browser::launch` failed but could not clean-up the child process (`wait`)");
188                }
189                return Err(e);
190            }
191        };
192
193        // Only infaillible calls are allowed after this point to avoid clean-up issues with the
194        // child process.
195
196        let (tx, rx) = channel(1);
197
198        let handler_config = HandlerConfig {
199            ignore_https_errors: config.ignore_https_errors,
200            ignore_invalid_messages: config.ignore_invalid_messages,
201            viewport: config.viewport.clone(),
202            context_ids: Vec::new(),
203            request_timeout: config.request_timeout,
204            request_intercept: config.request_intercept,
205            cache_enabled: config.cache_enabled,
206        };
207
208        let fut = Handler::new(conn, rx, handler_config);
209        let browser_context = fut.default_browser_context().clone();
210
211        let browser = Self {
212            sender: tx,
213            config: Some(config),
214            child: Some(child),
215            debug_ws_url,
216            browser_context,
217        };
218
219        Ok((browser, fut))
220    }
221
222    /// Request to fetch all existing browser targets.
223    ///
224    /// By default, only targets launched after the browser connection are tracked
225    /// when connecting to a existing browser instance with the devtools websocket url
226    /// This function fetches existing targets on the browser and adds them as pages internally
227    ///
228    /// The pages are not guaranteed to be ready as soon as the function returns
229    /// You should wait a few millis if you need to use a page
230    /// Returns [TargetInfo]
231    pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
232        let (tx, rx) = oneshot_channel();
233
234        self.sender
235            .clone()
236            .send(HandlerMessage::FetchTargets(tx))
237            .await?;
238
239        rx.await?
240    }
241
242    /// Request for the browser to close completely.
243    ///
244    /// If the browser was spawned by [`Browser::launch`], it is recommended to wait for the
245    /// spawned instance exit, to avoid "zombie" processes ([`Browser::wait`],
246    /// [`Browser::wait_sync`], [`Browser::try_wait`]).
247    /// [`Browser::drop`] waits automatically if needed.
248    pub async fn close(&mut self) -> Result<CloseReturns> {
249        let (tx, rx) = oneshot_channel();
250
251        self.sender
252            .clone()
253            .send(HandlerMessage::CloseBrowser(tx))
254            .await?;
255
256        rx.await?
257    }
258
259    /// Asynchronously wait for the spawned chromium instance to exit completely.
260    ///
261    /// The instance is spawned by [`Browser::launch`]. `wait` is usually called after
262    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
263    /// "zombie" processes.
264    ///
265    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
266    /// connected to an existing browser through [`Browser::connect`])
267    pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
268        if let Some(child) = self.child.as_mut() {
269            Ok(Some(child.wait().await?))
270        } else {
271            Ok(None)
272        }
273    }
274
275    /// If the spawned chromium instance has completely exited, wait for it.
276    ///
277    /// The instance is spawned by [`Browser::launch`]. `try_wait` is usually called after
278    /// [`Browser::close`]. You can call this explicitly to collect the process and avoid
279    /// "zombie" processes.
280    ///
281    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
282    /// connected to an existing browser through [`Browser::connect`])
283    pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
284        if let Some(child) = self.child.as_mut() {
285            child.try_wait()
286        } else {
287            Ok(None)
288        }
289    }
290
291    /// Get the spawned chromium instance
292    ///
293    /// The instance is spawned by [`Browser::launch`]. The result is a [`async_process::Child`]
294    /// value.
295    ///A
296    /// You may use [`async_process::Child::as_mut_inner`] to retrieve the concrete implementation
297    /// for the selected runtime.
298    ///
299    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
300    /// connected to an existing browser through [`Browser::connect`])
301    pub fn get_mut_child(&mut self) -> Option<&mut Child> {
302        self.child.as_mut()
303    }
304
305    /// Forcibly kill the spawned chromium instance
306    ///
307    /// The instance is spawned by [`Browser::launch`]. `kill` will automatically wait for the child
308    /// process to exit to avoid "zombie" processes.
309    ///
310    /// This method is provided to help if the browser does not close by itself. You should prefer
311    /// to use [`Browser::close`].
312    ///
313    /// This call has no effect if this [`Browser`] did not spawn any chromium instance (e.g.
314    /// connected to an existing browser through [`Browser::connect`])
315    pub async fn kill(&mut self) -> Option<io::Result<()>> {
316        match self.child.as_mut() {
317            Some(child) => Some(child.kill().await),
318            None => None,
319        }
320    }
321
322    /// If not launched as incognito this creates a new incognito browser
323    /// context. After that this browser exists within the incognito session.
324    /// New pages created while being in incognito mode will also run in the
325    /// incognito context. Incognito contexts won't share cookies/cache with
326    /// other browser contexts.
327    pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
328        if !self.is_incognito_configured() {
329            let browser_context_id = self
330                .create_browser_context(CreateBrowserContextParams::default())
331                .await?;
332            self.browser_context = BrowserContext::from(browser_context_id);
333            self.sender
334                .clone()
335                .send(HandlerMessage::InsertContext(self.browser_context.clone()))
336                .await?;
337        }
338
339        Ok(self)
340    }
341
342    /// If a incognito session was created with
343    /// `Browser::start_incognito_context` this disposes this context.
344    ///
345    /// # Note This will also dispose all pages that were running within the
346    /// incognito context.
347    pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
348        if let Some(id) = self.browser_context.take() {
349            self.dispose_browser_context(id.clone()).await?;
350            self.sender
351                .clone()
352                .send(HandlerMessage::DisposeContext(BrowserContext::from(id)))
353                .await?;
354        }
355        Ok(self)
356    }
357
358    /// Whether incognito mode was configured from the start
359    fn is_incognito_configured(&self) -> bool {
360        self.config
361            .as_ref()
362            .map(|c| c.incognito)
363            .unwrap_or_default()
364    }
365
366    /// Returns the address of the websocket this browser is attached to
367    pub fn websocket_address(&self) -> &String {
368        &self.debug_ws_url
369    }
370
371    /// Whether the BrowserContext is incognito.
372    pub fn is_incognito(&self) -> bool {
373        self.is_incognito_configured() || self.browser_context.is_incognito()
374    }
375
376    /// The config of the spawned chromium instance if any.
377    pub fn config(&self) -> Option<&BrowserConfig> {
378        self.config.as_ref()
379    }
380
381    /// Create a new browser page
382    pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
383        let (tx, rx) = oneshot_channel();
384        let mut params = params.into();
385        if let Some(id) = self.browser_context.id() {
386            if params.browser_context_id.is_none() {
387                params.browser_context_id = Some(id.clone());
388            }
389        }
390
391        self.sender
392            .clone()
393            .send(HandlerMessage::CreatePage(params, tx))
394            .await?;
395
396        rx.await?
397    }
398
399    /// Version information about the browser
400    pub async fn version(&self) -> Result<GetVersionReturns> {
401        Ok(self.execute(GetVersionParams::default()).await?.result)
402    }
403
404    /// Returns the user agent of the browser
405    pub async fn user_agent(&self) -> Result<String> {
406        Ok(self.version().await?.user_agent)
407    }
408
409    /// Call a browser method.
410    pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
411        let (tx, rx) = oneshot_channel();
412        let method = cmd.identifier();
413        let msg = CommandMessage::new(cmd, tx)?;
414
415        self.sender
416            .clone()
417            .send(HandlerMessage::Command(msg))
418            .await?;
419        let resp = rx.await??;
420        to_command_response::<T>(resp, method)
421    }
422
423    /// Return all of the pages of the browser
424    pub async fn pages(&self) -> Result<Vec<Page>> {
425        let (tx, rx) = oneshot_channel();
426        self.sender
427            .clone()
428            .send(HandlerMessage::GetPages(tx))
429            .await?;
430        Ok(rx.await?)
431    }
432
433    /// Return page of given target_id
434    pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
435        let (tx, rx) = oneshot_channel();
436        self.sender
437            .clone()
438            .send(HandlerMessage::GetPage(target_id, tx))
439            .await?;
440        rx.await?.ok_or(CdpError::NotFound)
441    }
442
443    /// Set listener for browser event
444    pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
445        let (tx, rx) = unbounded();
446        self.sender
447            .clone()
448            .send(HandlerMessage::AddEventListener(
449                EventListenerRequest::new::<T>(tx),
450            ))
451            .await?;
452
453        Ok(EventStream::new(rx))
454    }
455
456    /// Creates a new empty browser context.
457    pub async fn create_browser_context(
458        &self,
459        params: CreateBrowserContextParams,
460    ) -> Result<BrowserContextId> {
461        let response = self.execute(params).await?;
462        Ok(response.result.browser_context_id)
463    }
464
465    /// Deletes a browser context.
466    pub async fn dispose_browser_context(
467        &self,
468        browser_context_id: impl Into<BrowserContextId>,
469    ) -> Result<()> {
470        self.execute(DisposeBrowserContextParams::new(browser_context_id))
471            .await?;
472
473        Ok(())
474    }
475
476    /// Creates a new incognito browser context with a specified proxy.
477    ///
478    /// The proxy should be in the format `scheme://host:port` (e.g., `http://10.10.1.1:8080`).
479    /// Note: Authentication via `user:pass@host:port` in `proxy_server` string is generally
480    /// NOT supported by Chrome directly for contexts. You may need to handle auth challenges separately.
481    pub async fn create_incognito_context_with_proxy(
482        &self,
483        proxy_server: impl Into<String>,
484    ) -> Result<BrowserContextId> {
485        let params = CreateBrowserContextParams::builder()
486            .proxy_server(proxy_server)
487            .build();
488        self.create_browser_context(params).await
489    }
490
491    /// Clears cookies.
492    pub async fn clear_cookies(&self) -> Result<()> {
493        self.execute(ClearCookiesParams::default()).await?;
494        Ok(())
495    }
496
497    /// Returns all browser cookies.
498    pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
499        Ok(self
500            .execute(GetCookiesParams::default())
501            .await?
502            .result
503            .cookies)
504    }
505
506    /// Sets given cookies.
507    pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
508        for cookie in &mut cookies {
509            if let Some(url) = cookie.url.as_ref() {
510                crate::page::validate_cookie_url(url)?;
511            }
512        }
513
514        self.execute(SetCookiesParams::new(cookies)).await?;
515        Ok(self)
516    }
517}
518
519impl Drop for Browser {
520    fn drop(&mut self) {
521        if let Some(child) = self.child.as_mut() {
522            if let Ok(Some(_)) = child.try_wait() {
523                // Already exited, do nothing. Usually occurs after using the method close or kill.
524            } else {
525                // We set the `kill_on_drop` property for the child process, so no need to explicitely
526                // kill it here. It can't really be done anyway since the method is async.
527                //
528                // On Unix, the process will be reaped in the background by the runtime automatically
529                // so it won't leave any resources locked. It is, however, a better practice for the user to
530                // do it himself since the runtime doesn't provide garantees as to when the reap occurs, so we
531                // warn him here.
532                tracing::warn!(
533                    "Browser was not closed manually, it will be killed automatically in the background"
534                );
535            }
536        }
537    }
538}
539
540/// Resolve devtools WebSocket URL from the provided browser process
541///
542/// If an error occurs, it returns the browser's stderr output.
543///
544/// The URL resolution fails if:
545/// - [`CdpError::LaunchTimeout`]: `timeout_fut` completes, this corresponds to a timeout
546/// - [`CdpError::LaunchExit`]: the browser process exits (or is killed)
547/// - [`CdpError::LaunchIo`]: an input/output error occurs when await the process exit or reading
548///   the browser's stderr: end of stream, invalid UTF-8, other
549async fn ws_url_from_output(
550    child_process: &mut Child,
551    timeout_fut: impl Future<Output = ()> + Unpin,
552) -> Result<String> {
553    use futures::{AsyncBufReadExt, FutureExt};
554    let mut timeout_fut = timeout_fut.fuse();
555    let stderr = child_process.stderr.take().expect("no stderror");
556    let mut stderr_bytes = Vec::<u8>::new();
557    let mut exit_status_fut = Box::pin(child_process.wait()).fuse();
558    let mut buf = futures::io::BufReader::new(stderr);
559    loop {
560        select! {
561            _ = timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
562            exit_status = exit_status_fut => {
563                return Err(match exit_status {
564                    Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
565                    Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
566                })
567            },
568            read_res = buf.read_until(b'\n', &mut stderr_bytes).fuse() => {
569                match read_res {
570                    Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
571                    Ok(byte_count) => {
572                        if byte_count == 0 {
573                            let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
574                            return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
575                        }
576                        let start_offset = stderr_bytes.len() - byte_count;
577                        let new_bytes = &stderr_bytes[start_offset..];
578                        match std::str::from_utf8(new_bytes) {
579                            Err(_) => {
580                                let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
581                                return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
582                            }
583                            Ok(line) => {
584                                if let Some((_, ws)) = line.rsplit_once("listening on ") {
585                                    if ws.starts_with("ws") && ws.contains("devtools/browser") {
586                                        return Ok(ws.trim().to_string());
587                                    }
588                                }
589                            }
590                        }
591                    }
592                }
593            }
594        }
595    }
596}