1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6 io,
7 path::{Path, PathBuf},
8};
9
10use tokio::sync::mpsc::{channel, unbounded_channel, Sender};
11use tokio::sync::oneshot::channel as oneshot_channel;
12
13use crate::async_process::{self, Child, ExitStatus, Stdio};
14use crate::cmd::{to_command_response, CommandMessage};
15use crate::conn::Connection;
16use crate::detection::{self, DetectionOptions};
17use crate::error::{BrowserStderr, CdpError, Result};
18use crate::handler::browser::BrowserContext;
19use crate::handler::viewport::Viewport;
20use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
21use crate::listeners::{EventListenerRequest, EventStream};
22use crate::page::Page;
23use crate::utils;
24use chromiumoxide_cdp::cdp::browser_protocol::browser::{
25 BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
26};
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28 PermissionDescriptor, PermissionSetting, SetPermissionParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32 ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35 CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
36 GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
37};
38
39use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
40use chromiumoxide_types::*;
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42
43pub const LAUNCH_TIMEOUT: u64 = 20_000;
45
46lazy_static::lazy_static! {
47 static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
49 .timeout(Duration::from_secs(60))
50 .default_headers({
51 let mut m = HeaderMap::new();
52
53 m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55 m
56 })
57 .tcp_keepalive(Some(Duration::from_secs(5)))
58 .pool_idle_timeout(Some(Duration::from_secs(60)))
59 .pool_max_idle_per_host(10)
60 .build()
61 .expect("client to build");
62}
63
64pub fn request_client() -> &'static reqwest::Client {
67 &REQUEST_CLIENT
68}
69
70#[derive(Debug)]
72pub struct Browser {
73 pub(crate) sender: Sender<HandlerMessage>,
76 config: Option<BrowserConfig>,
78 child: Option<Child>,
80 debug_ws_url: String,
82 pub browser_context: BrowserContext,
84}
85
86#[derive(serde::Deserialize, Debug, Default)]
88pub struct BrowserConnection {
89 #[serde(rename = "Browser")]
90 pub browser: String,
92 #[serde(rename = "Protocol-Version")]
93 pub protocol_version: String,
95 #[serde(rename = "User-Agent")]
96 pub user_agent: String,
98 #[serde(rename = "V8-Version")]
99 pub v8_version: String,
101 #[serde(rename = "WebKit-Version")]
102 pub webkit_version: String,
104 #[serde(rename = "webSocketDebuggerUrl")]
105 pub web_socket_debugger_url: String,
107}
108
109impl Browser {
110 pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
114 Self::connect_with_config(url, HandlerConfig::default()).await
115 }
116
117 pub async fn connect_with_config(
121 url: impl Into<String>,
122 config: HandlerConfig,
123 ) -> Result<(Self, Handler)> {
124 let mut debug_ws_url = url.into();
125 let retries = config.connection_retries;
126
127 if debug_ws_url.starts_with("http") {
128 let version_url = if debug_ws_url.ends_with("/json/version")
129 || debug_ws_url.ends_with("/json/version/")
130 {
131 debug_ws_url.to_owned()
132 } else {
133 format!(
134 "{}{}json/version",
135 &debug_ws_url,
136 if debug_ws_url.ends_with('/') { "" } else { "/" }
137 )
138 };
139
140 let mut discovered = false;
141
142 for attempt in 0..=retries {
143 let retry = || async {
144 if attempt < retries {
145 let backoff_ms = 50u64
150 .saturating_mul(3u64.saturating_pow(attempt))
151 .min(crate::conn::MAX_BACKOFF_MS);
152 tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
153 }
154 };
155
156 match REQUEST_CLIENT.get(&version_url).send().await {
157 Ok(req) => match req.bytes().await {
158 Ok(b) => {
159 match crate::serde_json::from_slice::<Box<BrowserConnection>>(&b) {
160 Ok(connection)
161 if !connection.web_socket_debugger_url.is_empty() =>
162 {
163 debug_ws_url = connection.web_socket_debugger_url;
164 discovered = true;
165 break;
166 }
167 _ => {
168 retry().await;
170 }
171 }
172 }
173 Err(_) => {
174 retry().await;
175 }
176 },
177 Err(_) => {
178 retry().await;
179 }
180 }
181 }
182
183 if !discovered {
184 return Err(CdpError::NoResponse);
185 }
186 }
187
188 let conn =
189 Connection::<CdpEventMessage>::connect_with_retries(&debug_ws_url, retries).await?;
190
191 let (tx, rx) = channel(config.channel_capacity);
192
193 let handler_config = BrowserConfig {
194 ignore_https_errors: config.ignore_https_errors,
195 viewport: config.viewport.clone(),
196 request_timeout: config.request_timeout,
197 request_intercept: config.request_intercept,
198 cache_enabled: config.cache_enabled,
199 ignore_visuals: config.ignore_visuals,
200 ignore_stylesheets: config.ignore_stylesheets,
201 ignore_javascript: config.ignore_javascript,
202 ignore_analytics: config.ignore_analytics,
203 ignore_prefetch: config.ignore_prefetch,
204 ignore_ads: config.ignore_ads,
205 extra_headers: config.extra_headers.clone(),
206 only_html: config.only_html,
207 service_worker_enabled: config.service_worker_enabled,
208 intercept_manager: config.intercept_manager,
209 max_bytes_allowed: config.max_bytes_allowed,
210 max_redirects: config.max_redirects,
211 max_main_frame_navigations: config.max_main_frame_navigations,
212 whitelist_patterns: config.whitelist_patterns.clone(),
213 blacklist_patterns: config.blacklist_patterns.clone(),
214 ..Default::default()
215 };
216
217 let fut = Handler::new(conn, rx, config);
218 let browser_context = fut.default_browser_context().clone();
219
220 let browser = Self {
221 sender: tx,
222 config: Some(handler_config),
223 child: None,
224 debug_ws_url,
225 browser_context,
226 };
227
228 Ok((browser, fut))
229 }
230
231 pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
240 crate::bg_cleanup::init_worker();
246
247 config.executable = utils::canonicalize_except_snap(config.executable).await?;
249
250 let mut child = config.launch()?;
252
253 async fn with_child(
258 config: &BrowserConfig,
259 child: &mut Child,
260 ) -> Result<(String, Connection<CdpEventMessage>)> {
261 let dur = config.launch_timeout;
262 let timeout_fut = Box::pin(tokio::time::sleep(dur));
263
264 let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
266 let conn = Connection::<CdpEventMessage>::connect_with_retries(
267 &debug_ws_url,
268 config.connection_retries,
269 )
270 .await?;
271 Ok((debug_ws_url, conn))
272 }
273
274 let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
275 Ok(conn) => conn,
276 Err(e) => {
277 if let Ok(Some(_)) = child.try_wait() {
279 } else {
281 let _ = child.kill().await;
283 let _ = child.wait().await;
284 }
285 return Err(e);
286 }
287 };
288
289 let (tx, rx) = channel(config.channel_capacity);
293
294 let handler_config = HandlerConfig {
295 ignore_https_errors: config.ignore_https_errors,
296 viewport: config.viewport.clone(),
297 context_ids: Vec::new(),
298 request_timeout: config.request_timeout,
299 request_intercept: config.request_intercept,
300 cache_enabled: config.cache_enabled,
301 ignore_visuals: config.ignore_visuals,
302 ignore_stylesheets: config.ignore_stylesheets,
303 ignore_javascript: config.ignore_javascript,
304 ignore_analytics: config.ignore_analytics,
305 ignore_prefetch: config.ignore_prefetch,
306 ignore_ads: config.ignore_ads,
307 extra_headers: config.extra_headers.clone(),
308 only_html: config.only_html,
309 service_worker_enabled: config.service_worker_enabled,
310 created_first_target: false,
311 intercept_manager: config.intercept_manager,
312 max_bytes_allowed: config.max_bytes_allowed,
313 max_redirects: config.max_redirects,
314 max_main_frame_navigations: config.max_main_frame_navigations,
315 whitelist_patterns: config.whitelist_patterns.clone(),
316 blacklist_patterns: config.blacklist_patterns.clone(),
317 #[cfg(feature = "adblock")]
318 adblock_filter_rules: config.adblock_filter_rules.clone(),
319 channel_capacity: config.channel_capacity,
320 page_channel_capacity: config.page_channel_capacity,
321 connection_retries: config.connection_retries,
322 };
323
324 let fut = Handler::new(conn, rx, handler_config);
325 let browser_context = fut.default_browser_context().clone();
326
327 let browser = Self {
328 sender: tx,
329 config: Some(config),
330 child: Some(child),
331 debug_ws_url,
332 browser_context,
333 };
334
335 Ok((browser, fut))
336 }
337
338 pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
348 let (tx, rx) = oneshot_channel();
349
350 self.sender.send(HandlerMessage::FetchTargets(tx)).await?;
351
352 rx.await?
353 }
354
355 pub async fn close(&self) -> Result<CloseReturns> {
362 let (tx, rx) = oneshot_channel();
363
364 self.sender.send(HandlerMessage::CloseBrowser(tx)).await?;
365
366 rx.await?
367 }
368
369 pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
378 if let Some(child) = self.child.as_mut() {
379 Ok(Some(child.wait().await?))
380 } else {
381 Ok(None)
382 }
383 }
384
385 pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
394 if let Some(child) = self.child.as_mut() {
395 child.try_wait()
396 } else {
397 Ok(None)
398 }
399 }
400
401 pub fn get_mut_child(&mut self) -> Option<&mut Child> {
412 self.child.as_mut()
413 }
414
415 pub fn has_child(&self) -> bool {
417 self.child.is_some()
418 }
419
420 pub async fn kill(&mut self) -> Option<io::Result<()>> {
431 match self.child.as_mut() {
432 Some(child) => Some(child.kill().await),
433 None => None,
434 }
435 }
436
437 pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
443 if !self.is_incognito_configured() {
444 let browser_context_id = self
445 .create_browser_context(CreateBrowserContextParams::default())
446 .await?;
447 self.browser_context = BrowserContext::from(browser_context_id);
448 self.sender
449 .send(HandlerMessage::InsertContext(self.browser_context.clone()))
450 .await?;
451 }
452
453 Ok(self)
454 }
455
456 pub async fn quit_incognito_context_base(
462 &self,
463 browser_context_id: BrowserContextId,
464 ) -> Result<&Self> {
465 self.dispose_browser_context(browser_context_id.clone())
466 .await?;
467 self.sender
468 .send(HandlerMessage::DisposeContext(BrowserContext::from(
469 browser_context_id,
470 )))
471 .await?;
472 Ok(self)
473 }
474
475 pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
481 if let Some(id) = self.browser_context.take() {
482 let _ = self.quit_incognito_context_base(id).await;
483 }
484 Ok(self)
485 }
486
487 fn is_incognito_configured(&self) -> bool {
489 self.config
490 .as_ref()
491 .map(|c| c.incognito)
492 .unwrap_or_default()
493 }
494
495 pub fn websocket_address(&self) -> &String {
497 &self.debug_ws_url
498 }
499
500 pub fn is_incognito(&self) -> bool {
502 self.is_incognito_configured() || self.browser_context.is_incognito()
503 }
504
505 pub fn config(&self) -> Option<&BrowserConfig> {
507 self.config.as_ref()
508 }
509
510 pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
512 let (tx, rx) = oneshot_channel();
513 let mut params = params.into();
514
515 if let Some(id) = self.browser_context.id() {
516 if params.browser_context_id.is_none() {
517 params.browser_context_id = Some(id.clone());
518 }
519 }
520
521 let _ = self
522 .sender
523 .send(HandlerMessage::CreatePage(params, tx))
524 .await;
525
526 rx.await?
527 }
528
529 pub async fn version(&self) -> Result<GetVersionReturns> {
531 Ok(self.execute(GetVersionParams::default()).await?.result)
532 }
533
534 pub async fn user_agent(&self) -> Result<String> {
536 Ok(self.version().await?.user_agent)
537 }
538
539 pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
541 let (tx, rx) = oneshot_channel();
542 let method = cmd.identifier();
543 let msg = CommandMessage::new(cmd, tx)?;
544
545 self.sender.send(HandlerMessage::Command(msg)).await?;
546 let resp = rx.await??;
547 to_command_response::<T>(resp, method)
548 }
549
550 pub async fn set_permission(
554 &self,
555 permission: PermissionDescriptor,
556 setting: PermissionSetting,
557 origin: Option<impl Into<String>>,
558 embedded_origin: Option<impl Into<String>>,
559 browser_context_id: Option<BrowserContextId>,
560 ) -> Result<&Self> {
561 self.execute(SetPermissionParams {
562 permission,
563 setting,
564 origin: origin.map(Into::into),
565 embedded_origin: embedded_origin.map(Into::into),
566 browser_context_id: browser_context_id.or_else(|| self.browser_context.id.clone()),
567 })
568 .await?;
569 Ok(self)
570 }
571
572 pub async fn set_permission_for_origin(
574 &self,
575 origin: impl Into<String>,
576 embedded_origin: Option<impl Into<String>>,
577 permission: PermissionDescriptor,
578 setting: PermissionSetting,
579 ) -> Result<&Self> {
580 self.set_permission(permission, setting, Some(origin), embedded_origin, None)
581 .await
582 }
583
584 pub async fn reset_permission_for_origin(
586 &self,
587 origin: impl Into<String>,
588 embedded_origin: Option<impl Into<String>>,
589 permission: PermissionDescriptor,
590 ) -> Result<&Self> {
591 self.set_permission_for_origin(
592 origin,
593 embedded_origin,
594 permission,
595 PermissionSetting::Prompt,
596 )
597 .await
598 }
599
600 pub async fn grant_all_permission_for_origin(
602 &self,
603 origin: impl Into<String>,
604 embedded_origin: Option<impl Into<String>>,
605 permission: PermissionDescriptor,
606 ) -> Result<&Self> {
607 self.set_permission_for_origin(
608 origin,
609 embedded_origin,
610 permission,
611 PermissionSetting::Granted,
612 )
613 .await
614 }
615
616 pub async fn deny_all_permission_for_origin(
618 &self,
619 origin: impl Into<String>,
620 embedded_origin: Option<impl Into<String>>,
621 permission: PermissionDescriptor,
622 ) -> Result<&Self> {
623 self.set_permission_for_origin(
624 origin,
625 embedded_origin,
626 permission,
627 PermissionSetting::Denied,
628 )
629 .await
630 }
631
632 pub async fn pages(&self) -> Result<Vec<Page>> {
634 let (tx, rx) = oneshot_channel();
635 self.sender.send(HandlerMessage::GetPages(tx)).await?;
636 Ok(rx.await?)
637 }
638
639 pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
641 let (tx, rx) = oneshot_channel();
642 self.sender
643 .send(HandlerMessage::GetPage(target_id, tx))
644 .await?;
645 rx.await?.ok_or(CdpError::NotFound)
646 }
647
648 pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
650 let (tx, rx) = unbounded_channel();
651 self.sender
652 .send(HandlerMessage::AddEventListener(
653 EventListenerRequest::new::<T>(tx),
654 ))
655 .await?;
656
657 Ok(EventStream::new(rx))
658 }
659
660 pub async fn create_browser_context(
662 &mut self,
663 params: CreateBrowserContextParams,
664 ) -> Result<BrowserContextId> {
665 let response = self.execute(params).await?;
666
667 Ok(response.result.browser_context_id)
668 }
669
670 pub async fn get_browser_contexts(
672 &mut self,
673 params: GetBrowserContextsParams,
674 ) -> Result<GetBrowserContextsReturns> {
675 let response = self.execute(params).await?;
676 Ok(response.result)
677 }
678
679 pub async fn send_new_context(
681 &mut self,
682 browser_context_id: BrowserContextId,
683 ) -> Result<&Self> {
684 self.browser_context = BrowserContext::from(browser_context_id);
685 self.sender
686 .send(HandlerMessage::InsertContext(self.browser_context.clone()))
687 .await?;
688 Ok(self)
689 }
690
691 pub async fn dispose_browser_context(
693 &self,
694 browser_context_id: impl Into<BrowserContextId>,
695 ) -> Result<&Self> {
696 self.execute(DisposeBrowserContextParams::new(browser_context_id))
697 .await?;
698
699 Ok(self)
700 }
701
702 pub async fn clear_cookies(&self) -> Result<&Self> {
704 self.execute(ClearCookiesParams::default()).await?;
705 Ok(self)
706 }
707
708 pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
710 let cmd = GetCookiesParams {
711 browser_context_id: self.browser_context.id.clone(),
712 };
713
714 Ok(self.execute(cmd).await?.result.cookies)
715 }
716
717 pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
719 for cookie in &mut cookies {
720 if let Some(url) = cookie.url.as_ref() {
721 crate::page::validate_cookie_url(url)?;
722 }
723 }
724
725 let mut cookies_param = SetCookiesParams::new(cookies);
726
727 cookies_param.browser_context_id = self.browser_context.id.clone();
728
729 self.execute(cookies_param).await?;
730 Ok(self)
731 }
732}
733
734impl Drop for Browser {
735 fn drop(&mut self) {
736 if let Some(child) = self.child.as_mut() {
737 if let Ok(Some(_)) = child.try_wait() {
738 } else {
740 tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
748 }
749 }
750 }
751}
752
753async fn ws_url_from_output(
763 child_process: &mut Child,
764 timeout_fut: impl Future<Output = ()> + Unpin,
765) -> Result<String> {
766 use tokio::io::AsyncBufReadExt;
767 let stderr = match child_process.stderr.take() {
768 Some(stderr) => stderr,
769 None => {
770 return Err(CdpError::LaunchIo(
771 io::Error::new(io::ErrorKind::NotFound, "browser process has no stderr"),
772 BrowserStderr::new(Vec::new()),
773 ));
774 }
775 };
776 let mut stderr_bytes = Vec::<u8>::new();
777 let mut buf = tokio::io::BufReader::new(stderr);
778 let mut timeout_fut = timeout_fut;
779 loop {
780 tokio::select! {
781 _ = &mut timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
782 exit_status = child_process.wait() => {
783 return Err(match exit_status {
784 Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
785 Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
786 })
787 },
788 read_res = buf.read_until(b'\n', &mut stderr_bytes) => {
789 match read_res {
790 Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
791 Ok(byte_count) => {
792 if byte_count == 0 {
793 let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
794 return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
795 }
796 let start_offset = stderr_bytes.len() - byte_count;
797 let new_bytes = &stderr_bytes[start_offset..];
798 match std::str::from_utf8(new_bytes) {
799 Err(_) => {
800 let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
801 return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
802 }
803 Ok(line) => {
804 if let Some((_, ws)) = line.rsplit_once("listening on ") {
805 if ws.starts_with("ws") && ws.contains("devtools/browser") {
806 return Ok(ws.trim().to_string());
807 }
808 }
809 }
810 }
811 }
812 }
813 }
814 }
815 }
816}
817
818#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
819pub enum HeadlessMode {
820 False,
822 #[default]
824 True,
825 New,
827}
828
829#[derive(Debug, Clone, Default)]
830pub struct BrowserConfig {
831 headless: HeadlessMode,
834 sandbox: bool,
836 window_size: Option<(u32, u32)>,
838 port: u16,
840 executable: std::path::PathBuf,
845
846 extensions: Vec<String>,
854
855 pub process_envs: Option<HashMap<String, String>>,
858
859 pub user_data_dir: Option<PathBuf>,
861
862 incognito: bool,
864
865 launch_timeout: Duration,
867
868 ignore_https_errors: bool,
870 pub viewport: Option<Viewport>,
871 request_timeout: Duration,
873
874 args: Vec<String>,
876
877 disable_default_args: bool,
879
880 pub request_intercept: bool,
882
883 pub cache_enabled: bool,
885 pub service_worker_enabled: bool,
888 pub ignore_visuals: bool,
891 pub ignore_stylesheets: bool,
894 pub ignore_javascript: bool,
897 pub ignore_analytics: bool,
899 pub ignore_prefetch: bool,
901 pub ignore_ads: bool,
903 pub extra_headers: Option<std::collections::HashMap<String, String>>,
905 pub only_html: bool,
907 pub intercept_manager: NetworkInterceptManager,
909 pub max_bytes_allowed: Option<u64>,
911 pub max_redirects: Option<usize>,
914 pub max_main_frame_navigations: Option<u32>,
918 pub whitelist_patterns: Option<Vec<String>>,
920 pub blacklist_patterns: Option<Vec<String>>,
922 #[cfg(feature = "adblock")]
926 pub adblock_filter_rules: Option<Vec<String>>,
927 pub channel_capacity: usize,
930 pub page_channel_capacity: usize,
935 pub connection_retries: u32,
938}
939
940#[derive(Debug, Clone)]
941pub struct BrowserConfigBuilder {
942 headless: HeadlessMode,
944 sandbox: bool,
946 window_size: Option<(u32, u32)>,
948 port: u16,
950 executable: Option<PathBuf>,
953 executation_detection: DetectionOptions,
955 extensions: Vec<String>,
957 process_envs: Option<HashMap<String, String>>,
959 user_data_dir: Option<PathBuf>,
961 incognito: bool,
963 launch_timeout: Duration,
965 ignore_https_errors: bool,
967 viewport: Option<Viewport>,
969 request_timeout: Duration,
971 args: Vec<String>,
973 disable_default_args: bool,
975 request_intercept: bool,
977 cache_enabled: bool,
979 service_worker_enabled: bool,
981 ignore_visuals: bool,
983 ignore_ads: bool,
985 ignore_javascript: bool,
987 ignore_stylesheets: bool,
989 ignore_prefetch: bool,
991 ignore_analytics: bool,
993 only_html: bool,
995 extra_headers: Option<std::collections::HashMap<String, String>>,
997 intercept_manager: NetworkInterceptManager,
999 max_bytes_allowed: Option<u64>,
1001 max_redirects: Option<usize>,
1003 max_main_frame_navigations: Option<u32>,
1005 whitelist_patterns: Option<Vec<String>>,
1007 blacklist_patterns: Option<Vec<String>>,
1009 #[cfg(feature = "adblock")]
1011 adblock_filter_rules: Option<Vec<String>>,
1012 channel_capacity: usize,
1014 page_channel_capacity: usize,
1016 connection_retries: u32,
1018}
1019
1020impl BrowserConfig {
1021 pub fn builder() -> BrowserConfigBuilder {
1023 BrowserConfigBuilder::default()
1024 }
1025
1026 pub fn with_executable(path: impl AsRef<Path>) -> Self {
1028 Self::builder().chrome_executable(path).build().unwrap()
1031 }
1032}
1033
1034impl Default for BrowserConfigBuilder {
1035 fn default() -> Self {
1036 Self {
1037 headless: HeadlessMode::True,
1038 sandbox: true,
1039 window_size: None,
1040 port: 0,
1041 executable: None,
1042 executation_detection: DetectionOptions::default(),
1043 extensions: Vec::new(),
1044 process_envs: None,
1045 user_data_dir: None,
1046 incognito: false,
1047 launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
1048 ignore_https_errors: true,
1049 viewport: Some(Default::default()),
1050 request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1051 args: Vec::new(),
1052 disable_default_args: false,
1053 request_intercept: false,
1054 cache_enabled: true,
1055 ignore_visuals: false,
1056 ignore_ads: false,
1057 ignore_javascript: false,
1058 ignore_analytics: false,
1059 ignore_stylesheets: false,
1060 ignore_prefetch: true,
1061 only_html: false,
1062 extra_headers: Default::default(),
1063 service_worker_enabled: true,
1064 intercept_manager: NetworkInterceptManager::Unknown,
1065 max_bytes_allowed: None,
1066 max_redirects: None,
1067 max_main_frame_navigations: None,
1068 whitelist_patterns: None,
1069 blacklist_patterns: None,
1070 #[cfg(feature = "adblock")]
1071 adblock_filter_rules: None,
1072 channel_capacity: 4096,
1073 page_channel_capacity: crate::handler::page::DEFAULT_PAGE_CHANNEL_CAPACITY,
1074 connection_retries: crate::conn::DEFAULT_CONNECTION_RETRIES,
1075 }
1076 }
1077}
1078
1079impl BrowserConfigBuilder {
1080 pub fn window_size(mut self, width: u32, height: u32) -> Self {
1082 self.window_size = Some((width, height));
1083 self
1084 }
1085 pub fn no_sandbox(mut self) -> Self {
1087 self.sandbox = false;
1088 self
1089 }
1090 pub fn with_head(mut self) -> Self {
1092 self.headless = HeadlessMode::False;
1093 self
1094 }
1095 pub fn new_headless_mode(mut self) -> Self {
1097 self.headless = HeadlessMode::New;
1098 self
1099 }
1100 pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
1102 self.headless = mode;
1103 self
1104 }
1105 pub fn incognito(mut self) -> Self {
1107 self.incognito = true;
1108 self
1109 }
1110
1111 pub fn respect_https_errors(mut self) -> Self {
1112 self.ignore_https_errors = false;
1113 self
1114 }
1115
1116 pub fn port(mut self, port: u16) -> Self {
1117 self.port = port;
1118 self
1119 }
1120
1121 pub fn with_max_bytes_allowed(mut self, max_bytes_allowed: Option<u64>) -> Self {
1122 self.max_bytes_allowed = max_bytes_allowed;
1123 self
1124 }
1125
1126 pub fn with_max_redirects(mut self, max_redirects: Option<usize>) -> Self {
1132 self.max_redirects = max_redirects;
1133 self
1134 }
1135
1136 pub fn with_max_main_frame_navigations(mut self, cap: Option<u32>) -> Self {
1144 self.max_main_frame_navigations = cap;
1145 self
1146 }
1147
1148 pub fn launch_timeout(mut self, timeout: Duration) -> Self {
1149 self.launch_timeout = timeout;
1150 self
1151 }
1152
1153 pub fn request_timeout(mut self, timeout: Duration) -> Self {
1154 self.request_timeout = timeout;
1155 self
1156 }
1157
1158 pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
1164 self.viewport = viewport.into();
1165 self
1166 }
1167
1168 pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
1169 self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
1170 self
1171 }
1172
1173 pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
1174 self.executable = Some(path.as_ref().to_path_buf());
1175 self
1176 }
1177
1178 pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
1179 self.executation_detection = options;
1180 self
1181 }
1182
1183 pub fn extension(mut self, extension: impl Into<String>) -> Self {
1184 self.extensions.push(extension.into());
1185 self
1186 }
1187
1188 pub fn extensions<I, S>(mut self, extensions: I) -> Self
1189 where
1190 I: IntoIterator<Item = S>,
1191 S: Into<String>,
1192 {
1193 for ext in extensions {
1194 self.extensions.push(ext.into());
1195 }
1196 self
1197 }
1198
1199 pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
1200 self.process_envs
1201 .get_or_insert(HashMap::new())
1202 .insert(key.into(), val.into());
1203 self
1204 }
1205
1206 pub fn envs<I, K, V>(mut self, envs: I) -> Self
1207 where
1208 I: IntoIterator<Item = (K, V)>,
1209 K: Into<String>,
1210 V: Into<String>,
1211 {
1212 self.process_envs
1213 .get_or_insert(HashMap::new())
1214 .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
1215 self
1216 }
1217
1218 pub fn arg(mut self, arg: impl Into<String>) -> Self {
1219 self.args.push(arg.into());
1220 self
1221 }
1222
1223 pub fn args<I, S>(mut self, args: I) -> Self
1224 where
1225 I: IntoIterator<Item = S>,
1226 S: Into<String>,
1227 {
1228 for arg in args {
1229 self.args.push(arg.into());
1230 }
1231 self
1232 }
1233
1234 pub fn disable_default_args(mut self) -> Self {
1235 self.disable_default_args = true;
1236 self
1237 }
1238
1239 pub fn enable_request_intercept(mut self) -> Self {
1240 self.request_intercept = true;
1241 self
1242 }
1243
1244 pub fn disable_request_intercept(mut self) -> Self {
1245 self.request_intercept = false;
1246 self
1247 }
1248
1249 pub fn enable_cache(mut self) -> Self {
1250 self.cache_enabled = true;
1251 self
1252 }
1253
1254 pub fn disable_cache(mut self) -> Self {
1255 self.cache_enabled = false;
1256 self
1257 }
1258
1259 pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
1261 self.service_worker_enabled = bypass;
1262 self
1263 }
1264
1265 pub fn set_extra_headers(
1267 mut self,
1268 headers: Option<std::collections::HashMap<String, String>>,
1269 ) -> Self {
1270 self.extra_headers = headers;
1271 self
1272 }
1273
1274 pub fn set_whitelist_patterns(mut self, whitelist_patterns: Option<Vec<String>>) -> Self {
1276 self.whitelist_patterns = whitelist_patterns;
1277 self
1278 }
1279
1280 pub fn set_blacklist_patterns(mut self, blacklist_patterns: Option<Vec<String>>) -> Self {
1282 self.blacklist_patterns = blacklist_patterns;
1283 self
1284 }
1285
1286 #[cfg(feature = "adblock")]
1289 pub fn set_adblock_filter_rules(mut self, rules: Vec<String>) -> Self {
1290 self.adblock_filter_rules = Some(rules);
1291 self
1292 }
1293
1294 pub fn channel_capacity(mut self, capacity: usize) -> Self {
1297 self.channel_capacity = capacity;
1298 self
1299 }
1300
1301 pub fn page_channel_capacity(mut self, capacity: usize) -> Self {
1310 self.page_channel_capacity = capacity;
1311 self
1312 }
1313
1314 pub fn connection_retries(mut self, retries: u32) -> Self {
1317 self.connection_retries = retries;
1318 self
1319 }
1320
1321 pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1323 let executable = if let Some(e) = self.executable {
1324 e
1325 } else {
1326 detection::default_executable(self.executation_detection)?
1327 };
1328
1329 Ok(BrowserConfig {
1330 headless: self.headless,
1331 sandbox: self.sandbox,
1332 window_size: self.window_size,
1333 port: self.port,
1334 executable,
1335 extensions: self.extensions,
1336 process_envs: self.process_envs,
1337 user_data_dir: self.user_data_dir,
1338 incognito: self.incognito,
1339 launch_timeout: self.launch_timeout,
1340 ignore_https_errors: self.ignore_https_errors,
1341 viewport: self.viewport,
1342 request_timeout: self.request_timeout,
1343 args: self.args,
1344 disable_default_args: self.disable_default_args,
1345 request_intercept: self.request_intercept,
1346 cache_enabled: self.cache_enabled,
1347 ignore_visuals: self.ignore_visuals,
1348 ignore_ads: self.ignore_ads,
1349 ignore_javascript: self.ignore_javascript,
1350 ignore_analytics: self.ignore_analytics,
1351 ignore_stylesheets: self.ignore_stylesheets,
1352 ignore_prefetch: self.ignore_prefetch,
1353 extra_headers: self.extra_headers,
1354 only_html: self.only_html,
1355 intercept_manager: self.intercept_manager,
1356 service_worker_enabled: self.service_worker_enabled,
1357 max_bytes_allowed: self.max_bytes_allowed,
1358 max_redirects: self.max_redirects,
1359 max_main_frame_navigations: self.max_main_frame_navigations,
1360 whitelist_patterns: self.whitelist_patterns,
1361 blacklist_patterns: self.blacklist_patterns,
1362 #[cfg(feature = "adblock")]
1363 adblock_filter_rules: self.adblock_filter_rules,
1364 channel_capacity: self.channel_capacity,
1365 page_channel_capacity: self.page_channel_capacity,
1366 connection_retries: self.connection_retries,
1367 })
1368 }
1369}
1370
1371impl BrowserConfig {
1372 pub fn launch(&self) -> io::Result<Child> {
1373 let mut cmd = async_process::Command::new(&self.executable);
1374
1375 if self.disable_default_args {
1376 cmd.args(&self.args);
1377 } else {
1378 cmd.args(DEFAULT_ARGS).args(&self.args);
1379 }
1380
1381 if !self
1382 .args
1383 .iter()
1384 .any(|arg| arg.contains("--remote-debugging-port="))
1385 {
1386 cmd.arg(format!("--remote-debugging-port={}", self.port));
1387 }
1388
1389 cmd.args(
1390 self.extensions
1391 .iter()
1392 .map(|e| format!("--load-extension={e}")),
1393 );
1394
1395 if let Some(ref user_data) = self.user_data_dir {
1396 cmd.arg(format!("--user-data-dir={}", user_data.display()));
1397 } else {
1398 cmd.arg(format!(
1402 "--user-data-dir={}",
1403 std::env::temp_dir().join("chromiumoxide-runner").display()
1404 ));
1405 }
1406
1407 if let Some((width, height)) = self.window_size {
1408 cmd.arg(format!("--window-size={width},{height}"));
1409 }
1410
1411 if !self.sandbox {
1412 cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1413 }
1414
1415 match self.headless {
1416 HeadlessMode::False => (),
1417 HeadlessMode::True => {
1418 cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1419 }
1420 HeadlessMode::New => {
1421 cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1422 }
1423 }
1424
1425 if self.incognito {
1426 cmd.arg("--incognito");
1427 }
1428
1429 if let Some(ref envs) = self.process_envs {
1430 cmd.envs(envs);
1431 }
1432 cmd.stderr(Stdio::piped()).spawn()
1433 }
1434}
1435
1436#[deprecated(note = "Use detection::default_executable instead")]
1445pub fn default_executable() -> Result<std::path::PathBuf, String> {
1446 let options = DetectionOptions {
1447 msedge: false,
1448 unstable: false,
1449 };
1450 detection::default_executable(options)
1451}
1452
1453static DEFAULT_ARGS: [&str; 26] = [
1456 "--disable-background-networking",
1457 "--enable-features=NetworkService,NetworkServiceInProcess",
1458 "--disable-background-timer-throttling",
1459 "--disable-backgrounding-occluded-windows",
1460 "--disable-breakpad",
1461 "--disable-client-side-phishing-detection",
1462 "--disable-component-extensions-with-background-pages",
1463 "--disable-default-apps",
1464 "--disable-dev-shm-usage",
1465 "--disable-extensions",
1466 "--disable-features=TranslateUI",
1467 "--disable-hang-monitor",
1468 "--disable-ipc-flooding-protection",
1469 "--disable-popup-blocking",
1470 "--disable-prompt-on-repost",
1471 "--disable-renderer-backgrounding",
1472 "--disable-sync",
1473 "--force-color-profile=srgb",
1474 "--metrics-recording-only",
1475 "--no-first-run",
1476 "--enable-automation",
1477 "--password-store=basic",
1478 "--use-mock-keychain",
1479 "--enable-blink-features=IdleDetection",
1480 "--lang=en_US",
1481 "--disable-blink-features=AutomationControlled",
1482];