1use hashbrown::HashMap;
2use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE};
3use std::future::Future;
4use std::time::Duration;
5use std::{
6 io,
7 path::{Path, PathBuf},
8};
9
10use tokio::sync::mpsc::{channel, unbounded_channel, Sender};
11use tokio::sync::oneshot::channel as oneshot_channel;
12
13use crate::async_process::{self, Child, ExitStatus, Stdio};
14use crate::cmd::{to_command_response, CommandMessage};
15use crate::conn::Connection;
16use crate::detection::{self, DetectionOptions};
17use crate::error::{BrowserStderr, CdpError, Result};
18use crate::handler::browser::BrowserContext;
19use crate::handler::viewport::Viewport;
20use crate::handler::{Handler, HandlerConfig, HandlerMessage, REQUEST_TIMEOUT};
21use crate::listeners::{EventListenerRequest, EventStream};
22use crate::page::Page;
23use crate::utils;
24use chromiumoxide_cdp::cdp::browser_protocol::browser::{
25 BrowserContextId, CloseReturns, GetVersionParams, GetVersionReturns,
26};
27use chromiumoxide_cdp::cdp::browser_protocol::browser::{
28 PermissionDescriptor, PermissionSetting, SetPermissionParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::network::{Cookie, CookieParam};
31use chromiumoxide_cdp::cdp::browser_protocol::storage::{
32 ClearCookiesParams, GetCookiesParams, SetCookiesParams,
33};
34use chromiumoxide_cdp::cdp::browser_protocol::target::{
35 CreateBrowserContextParams, CreateTargetParams, DisposeBrowserContextParams,
36 GetBrowserContextsParams, GetBrowserContextsReturns, TargetId, TargetInfo,
37};
38
39use chromiumoxide_cdp::cdp::{CdpEventMessage, IntoEventKind};
40use chromiumoxide_types::*;
41use spider_network_blocker::intercept_manager::NetworkInterceptManager;
42
43pub const LAUNCH_TIMEOUT: u64 = 20_000;
45
46lazy_static::lazy_static! {
47 static ref REQUEST_CLIENT: reqwest::Client = reqwest::Client::builder()
49 .timeout(Duration::from_secs(60))
50 .default_headers({
51 let mut m = HeaderMap::new();
52
53 m.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
54
55 m
56 })
57 .tcp_keepalive(Some(Duration::from_secs(5)))
58 .pool_idle_timeout(Some(Duration::from_secs(60)))
59 .pool_max_idle_per_host(10)
60 .build()
61 .expect("client to build");
62}
63
64pub fn request_client() -> &'static reqwest::Client {
67 &REQUEST_CLIENT
68}
69
70#[derive(Debug)]
72pub struct Browser {
73 pub(crate) sender: Sender<HandlerMessage>,
76 config: Option<BrowserConfig>,
78 child: Option<Child>,
80 debug_ws_url: String,
82 pub browser_context: BrowserContext,
84}
85
86#[derive(serde::Deserialize, Debug, Default)]
88pub struct BrowserConnection {
89 #[serde(rename = "Browser")]
90 pub browser: String,
92 #[serde(rename = "Protocol-Version")]
93 pub protocol_version: String,
95 #[serde(rename = "User-Agent")]
96 pub user_agent: String,
98 #[serde(rename = "V8-Version")]
99 pub v8_version: String,
101 #[serde(rename = "WebKit-Version")]
102 pub webkit_version: String,
104 #[serde(rename = "webSocketDebuggerUrl")]
105 pub web_socket_debugger_url: String,
107}
108
109impl Browser {
110 pub async fn connect(url: impl Into<String>) -> Result<(Self, Handler)> {
114 Self::connect_with_config(url, HandlerConfig::default()).await
115 }
116
117 pub async fn connect_with_config(
121 url: impl Into<String>,
122 config: HandlerConfig,
123 ) -> Result<(Self, Handler)> {
124 let mut debug_ws_url = url.into();
125 let retries = config.connection_retries;
126
127 if debug_ws_url.starts_with("http") {
128 let version_url = if debug_ws_url.ends_with("/json/version")
129 || debug_ws_url.ends_with("/json/version/")
130 {
131 debug_ws_url.to_owned()
132 } else {
133 format!(
134 "{}{}json/version",
135 &debug_ws_url,
136 if debug_ws_url.ends_with('/') { "" } else { "/" }
137 )
138 };
139
140 let mut discovered = false;
141
142 for attempt in 0..=retries {
143 let retry = || async {
144 if attempt < retries {
145 let backoff_ms = 50u64 * 3u64.saturating_pow(attempt);
146 tokio::time::sleep(Duration::from_millis(backoff_ms)).await;
147 }
148 };
149
150 match REQUEST_CLIENT.get(&version_url).send().await {
151 Ok(req) => match req.bytes().await {
152 Ok(b) => {
153 match crate::serde_json::from_slice::<Box<BrowserConnection>>(&b) {
154 Ok(connection)
155 if !connection.web_socket_debugger_url.is_empty() =>
156 {
157 debug_ws_url = connection.web_socket_debugger_url;
158 discovered = true;
159 break;
160 }
161 _ => {
162 retry().await;
164 }
165 }
166 }
167 Err(_) => {
168 retry().await;
169 }
170 },
171 Err(_) => {
172 retry().await;
173 }
174 }
175 }
176
177 if !discovered {
178 return Err(CdpError::NoResponse);
179 }
180 }
181
182 let conn =
183 Connection::<CdpEventMessage>::connect_with_retries(&debug_ws_url, retries).await?;
184
185 let (tx, rx) = channel(config.channel_capacity);
186
187 let handler_config = BrowserConfig {
188 ignore_https_errors: config.ignore_https_errors,
189 viewport: config.viewport.clone(),
190 request_timeout: config.request_timeout,
191 request_intercept: config.request_intercept,
192 cache_enabled: config.cache_enabled,
193 ignore_visuals: config.ignore_visuals,
194 ignore_stylesheets: config.ignore_stylesheets,
195 ignore_javascript: config.ignore_javascript,
196 ignore_analytics: config.ignore_analytics,
197 ignore_prefetch: config.ignore_prefetch,
198 ignore_ads: config.ignore_ads,
199 extra_headers: config.extra_headers.clone(),
200 only_html: config.only_html,
201 service_worker_enabled: config.service_worker_enabled,
202 intercept_manager: config.intercept_manager,
203 max_bytes_allowed: config.max_bytes_allowed,
204 max_redirects: config.max_redirects,
205 max_main_frame_navigations: config.max_main_frame_navigations,
206 whitelist_patterns: config.whitelist_patterns.clone(),
207 blacklist_patterns: config.blacklist_patterns.clone(),
208 ..Default::default()
209 };
210
211 let fut = Handler::new(conn, rx, config);
212 let browser_context = fut.default_browser_context().clone();
213
214 let browser = Self {
215 sender: tx,
216 config: Some(handler_config),
217 child: None,
218 debug_ws_url,
219 browser_context,
220 };
221
222 Ok((browser, fut))
223 }
224
225 pub async fn launch(mut config: BrowserConfig) -> Result<(Self, Handler)> {
234 crate::bg_cleanup::init_worker();
240
241 config.executable = utils::canonicalize_except_snap(config.executable).await?;
243
244 let mut child = config.launch()?;
246
247 async fn with_child(
252 config: &BrowserConfig,
253 child: &mut Child,
254 ) -> Result<(String, Connection<CdpEventMessage>)> {
255 let dur = config.launch_timeout;
256 let timeout_fut = Box::pin(tokio::time::sleep(dur));
257
258 let debug_ws_url = ws_url_from_output(child, timeout_fut).await?;
260 let conn = Connection::<CdpEventMessage>::connect_with_retries(
261 &debug_ws_url,
262 config.connection_retries,
263 )
264 .await?;
265 Ok((debug_ws_url, conn))
266 }
267
268 let (debug_ws_url, conn) = match with_child(&config, &mut child).await {
269 Ok(conn) => conn,
270 Err(e) => {
271 if let Ok(Some(_)) = child.try_wait() {
273 } else {
275 let _ = child.kill().await;
277 let _ = child.wait().await;
278 }
279 return Err(e);
280 }
281 };
282
283 let (tx, rx) = channel(config.channel_capacity);
287
288 let handler_config = HandlerConfig {
289 ignore_https_errors: config.ignore_https_errors,
290 viewport: config.viewport.clone(),
291 context_ids: Vec::new(),
292 request_timeout: config.request_timeout,
293 request_intercept: config.request_intercept,
294 cache_enabled: config.cache_enabled,
295 ignore_visuals: config.ignore_visuals,
296 ignore_stylesheets: config.ignore_stylesheets,
297 ignore_javascript: config.ignore_javascript,
298 ignore_analytics: config.ignore_analytics,
299 ignore_prefetch: config.ignore_prefetch,
300 ignore_ads: config.ignore_ads,
301 extra_headers: config.extra_headers.clone(),
302 only_html: config.only_html,
303 service_worker_enabled: config.service_worker_enabled,
304 created_first_target: false,
305 intercept_manager: config.intercept_manager,
306 max_bytes_allowed: config.max_bytes_allowed,
307 max_redirects: config.max_redirects,
308 max_main_frame_navigations: config.max_main_frame_navigations,
309 whitelist_patterns: config.whitelist_patterns.clone(),
310 blacklist_patterns: config.blacklist_patterns.clone(),
311 #[cfg(feature = "adblock")]
312 adblock_filter_rules: config.adblock_filter_rules.clone(),
313 channel_capacity: config.channel_capacity,
314 page_channel_capacity: config.page_channel_capacity,
315 connection_retries: config.connection_retries,
316 };
317
318 let fut = Handler::new(conn, rx, handler_config);
319 let browser_context = fut.default_browser_context().clone();
320
321 let browser = Self {
322 sender: tx,
323 config: Some(config),
324 child: Some(child),
325 debug_ws_url,
326 browser_context,
327 };
328
329 Ok((browser, fut))
330 }
331
332 pub async fn fetch_targets(&mut self) -> Result<Vec<TargetInfo>> {
342 let (tx, rx) = oneshot_channel();
343
344 self.sender.send(HandlerMessage::FetchTargets(tx)).await?;
345
346 rx.await?
347 }
348
349 pub async fn close(&self) -> Result<CloseReturns> {
356 let (tx, rx) = oneshot_channel();
357
358 self.sender.send(HandlerMessage::CloseBrowser(tx)).await?;
359
360 rx.await?
361 }
362
363 pub async fn wait(&mut self) -> io::Result<Option<ExitStatus>> {
372 if let Some(child) = self.child.as_mut() {
373 Ok(Some(child.wait().await?))
374 } else {
375 Ok(None)
376 }
377 }
378
379 pub fn try_wait(&mut self) -> io::Result<Option<ExitStatus>> {
388 if let Some(child) = self.child.as_mut() {
389 child.try_wait()
390 } else {
391 Ok(None)
392 }
393 }
394
395 pub fn get_mut_child(&mut self) -> Option<&mut Child> {
406 self.child.as_mut()
407 }
408
409 pub fn has_child(&self) -> bool {
411 self.child.is_some()
412 }
413
414 pub async fn kill(&mut self) -> Option<io::Result<()>> {
425 match self.child.as_mut() {
426 Some(child) => Some(child.kill().await),
427 None => None,
428 }
429 }
430
431 pub async fn start_incognito_context(&mut self) -> Result<&mut Self> {
437 if !self.is_incognito_configured() {
438 let browser_context_id = self
439 .create_browser_context(CreateBrowserContextParams::default())
440 .await?;
441 self.browser_context = BrowserContext::from(browser_context_id);
442 self.sender
443 .send(HandlerMessage::InsertContext(self.browser_context.clone()))
444 .await?;
445 }
446
447 Ok(self)
448 }
449
450 pub async fn quit_incognito_context_base(
456 &self,
457 browser_context_id: BrowserContextId,
458 ) -> Result<&Self> {
459 self.dispose_browser_context(browser_context_id.clone())
460 .await?;
461 self.sender
462 .send(HandlerMessage::DisposeContext(BrowserContext::from(
463 browser_context_id,
464 )))
465 .await?;
466 Ok(self)
467 }
468
469 pub async fn quit_incognito_context(&mut self) -> Result<&mut Self> {
475 if let Some(id) = self.browser_context.take() {
476 let _ = self.quit_incognito_context_base(id).await;
477 }
478 Ok(self)
479 }
480
481 fn is_incognito_configured(&self) -> bool {
483 self.config
484 .as_ref()
485 .map(|c| c.incognito)
486 .unwrap_or_default()
487 }
488
489 pub fn websocket_address(&self) -> &String {
491 &self.debug_ws_url
492 }
493
494 pub fn is_incognito(&self) -> bool {
496 self.is_incognito_configured() || self.browser_context.is_incognito()
497 }
498
499 pub fn config(&self) -> Option<&BrowserConfig> {
501 self.config.as_ref()
502 }
503
504 pub async fn new_page(&self, params: impl Into<CreateTargetParams>) -> Result<Page> {
506 let (tx, rx) = oneshot_channel();
507 let mut params = params.into();
508
509 if let Some(id) = self.browser_context.id() {
510 if params.browser_context_id.is_none() {
511 params.browser_context_id = Some(id.clone());
512 }
513 }
514
515 let _ = self
516 .sender
517 .send(HandlerMessage::CreatePage(params, tx))
518 .await;
519
520 rx.await?
521 }
522
523 pub async fn version(&self) -> Result<GetVersionReturns> {
525 Ok(self.execute(GetVersionParams::default()).await?.result)
526 }
527
528 pub async fn user_agent(&self) -> Result<String> {
530 Ok(self.version().await?.user_agent)
531 }
532
533 pub async fn execute<T: Command>(&self, cmd: T) -> Result<CommandResponse<T::Response>> {
535 let (tx, rx) = oneshot_channel();
536 let method = cmd.identifier();
537 let msg = CommandMessage::new(cmd, tx)?;
538
539 self.sender.send(HandlerMessage::Command(msg)).await?;
540 let resp = rx.await??;
541 to_command_response::<T>(resp, method)
542 }
543
544 pub async fn set_permission(
548 &self,
549 permission: PermissionDescriptor,
550 setting: PermissionSetting,
551 origin: Option<impl Into<String>>,
552 embedded_origin: Option<impl Into<String>>,
553 browser_context_id: Option<BrowserContextId>,
554 ) -> Result<&Self> {
555 self.execute(SetPermissionParams {
556 permission,
557 setting,
558 origin: origin.map(Into::into),
559 embedded_origin: embedded_origin.map(Into::into),
560 browser_context_id: browser_context_id.or_else(|| self.browser_context.id.clone()),
561 })
562 .await?;
563 Ok(self)
564 }
565
566 pub async fn set_permission_for_origin(
568 &self,
569 origin: impl Into<String>,
570 embedded_origin: Option<impl Into<String>>,
571 permission: PermissionDescriptor,
572 setting: PermissionSetting,
573 ) -> Result<&Self> {
574 self.set_permission(permission, setting, Some(origin), embedded_origin, None)
575 .await
576 }
577
578 pub async fn reset_permission_for_origin(
580 &self,
581 origin: impl Into<String>,
582 embedded_origin: Option<impl Into<String>>,
583 permission: PermissionDescriptor,
584 ) -> Result<&Self> {
585 self.set_permission_for_origin(
586 origin,
587 embedded_origin,
588 permission,
589 PermissionSetting::Prompt,
590 )
591 .await
592 }
593
594 pub async fn grant_all_permission_for_origin(
596 &self,
597 origin: impl Into<String>,
598 embedded_origin: Option<impl Into<String>>,
599 permission: PermissionDescriptor,
600 ) -> Result<&Self> {
601 self.set_permission_for_origin(
602 origin,
603 embedded_origin,
604 permission,
605 PermissionSetting::Granted,
606 )
607 .await
608 }
609
610 pub async fn deny_all_permission_for_origin(
612 &self,
613 origin: impl Into<String>,
614 embedded_origin: Option<impl Into<String>>,
615 permission: PermissionDescriptor,
616 ) -> Result<&Self> {
617 self.set_permission_for_origin(
618 origin,
619 embedded_origin,
620 permission,
621 PermissionSetting::Denied,
622 )
623 .await
624 }
625
626 pub async fn pages(&self) -> Result<Vec<Page>> {
628 let (tx, rx) = oneshot_channel();
629 self.sender.send(HandlerMessage::GetPages(tx)).await?;
630 Ok(rx.await?)
631 }
632
633 pub async fn get_page(&self, target_id: TargetId) -> Result<Page> {
635 let (tx, rx) = oneshot_channel();
636 self.sender
637 .send(HandlerMessage::GetPage(target_id, tx))
638 .await?;
639 rx.await?.ok_or(CdpError::NotFound)
640 }
641
642 pub async fn event_listener<T: IntoEventKind>(&self) -> Result<EventStream<T>> {
644 let (tx, rx) = unbounded_channel();
645 self.sender
646 .send(HandlerMessage::AddEventListener(
647 EventListenerRequest::new::<T>(tx),
648 ))
649 .await?;
650
651 Ok(EventStream::new(rx))
652 }
653
654 pub async fn create_browser_context(
656 &mut self,
657 params: CreateBrowserContextParams,
658 ) -> Result<BrowserContextId> {
659 let response = self.execute(params).await?;
660
661 Ok(response.result.browser_context_id)
662 }
663
664 pub async fn get_browser_contexts(
666 &mut self,
667 params: GetBrowserContextsParams,
668 ) -> Result<GetBrowserContextsReturns> {
669 let response = self.execute(params).await?;
670 Ok(response.result)
671 }
672
673 pub async fn send_new_context(
675 &mut self,
676 browser_context_id: BrowserContextId,
677 ) -> Result<&Self> {
678 self.browser_context = BrowserContext::from(browser_context_id);
679 self.sender
680 .send(HandlerMessage::InsertContext(self.browser_context.clone()))
681 .await?;
682 Ok(self)
683 }
684
685 pub async fn dispose_browser_context(
687 &self,
688 browser_context_id: impl Into<BrowserContextId>,
689 ) -> Result<&Self> {
690 self.execute(DisposeBrowserContextParams::new(browser_context_id))
691 .await?;
692
693 Ok(self)
694 }
695
696 pub async fn clear_cookies(&self) -> Result<&Self> {
698 self.execute(ClearCookiesParams::default()).await?;
699 Ok(self)
700 }
701
702 pub async fn get_cookies(&self) -> Result<Vec<Cookie>> {
704 let cmd = GetCookiesParams {
705 browser_context_id: self.browser_context.id.clone(),
706 };
707
708 Ok(self.execute(cmd).await?.result.cookies)
709 }
710
711 pub async fn set_cookies(&self, mut cookies: Vec<CookieParam>) -> Result<&Self> {
713 for cookie in &mut cookies {
714 if let Some(url) = cookie.url.as_ref() {
715 crate::page::validate_cookie_url(url)?;
716 }
717 }
718
719 let mut cookies_param = SetCookiesParams::new(cookies);
720
721 cookies_param.browser_context_id = self.browser_context.id.clone();
722
723 self.execute(cookies_param).await?;
724 Ok(self)
725 }
726}
727
728impl Drop for Browser {
729 fn drop(&mut self) {
730 if let Some(child) = self.child.as_mut() {
731 if let Ok(Some(_)) = child.try_wait() {
732 } else {
734 tracing::warn!("Browser was not closed manually, it will be killed automatically in the background");
742 }
743 }
744 }
745}
746
747async fn ws_url_from_output(
757 child_process: &mut Child,
758 timeout_fut: impl Future<Output = ()> + Unpin,
759) -> Result<String> {
760 use tokio::io::AsyncBufReadExt;
761 let stderr = match child_process.stderr.take() {
762 Some(stderr) => stderr,
763 None => {
764 return Err(CdpError::LaunchIo(
765 io::Error::new(io::ErrorKind::NotFound, "browser process has no stderr"),
766 BrowserStderr::new(Vec::new()),
767 ));
768 }
769 };
770 let mut stderr_bytes = Vec::<u8>::new();
771 let mut buf = tokio::io::BufReader::new(stderr);
772 let mut timeout_fut = timeout_fut;
773 loop {
774 tokio::select! {
775 _ = &mut timeout_fut => return Err(CdpError::LaunchTimeout(BrowserStderr::new(stderr_bytes))),
776 exit_status = child_process.wait() => {
777 return Err(match exit_status {
778 Err(e) => CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)),
779 Ok(exit_status) => CdpError::LaunchExit(exit_status, BrowserStderr::new(stderr_bytes)),
780 })
781 },
782 read_res = buf.read_until(b'\n', &mut stderr_bytes) => {
783 match read_res {
784 Err(e) => return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes))),
785 Ok(byte_count) => {
786 if byte_count == 0 {
787 let e = io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected end of stream");
788 return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
789 }
790 let start_offset = stderr_bytes.len() - byte_count;
791 let new_bytes = &stderr_bytes[start_offset..];
792 match std::str::from_utf8(new_bytes) {
793 Err(_) => {
794 let e = io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8");
795 return Err(CdpError::LaunchIo(e, BrowserStderr::new(stderr_bytes)));
796 }
797 Ok(line) => {
798 if let Some((_, ws)) = line.rsplit_once("listening on ") {
799 if ws.starts_with("ws") && ws.contains("devtools/browser") {
800 return Ok(ws.trim().to_string());
801 }
802 }
803 }
804 }
805 }
806 }
807 }
808 }
809 }
810}
811
812#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
813pub enum HeadlessMode {
814 False,
816 #[default]
818 True,
819 New,
821}
822
823#[derive(Debug, Clone, Default)]
824pub struct BrowserConfig {
825 headless: HeadlessMode,
828 sandbox: bool,
830 window_size: Option<(u32, u32)>,
832 port: u16,
834 executable: std::path::PathBuf,
839
840 extensions: Vec<String>,
848
849 pub process_envs: Option<HashMap<String, String>>,
852
853 pub user_data_dir: Option<PathBuf>,
855
856 incognito: bool,
858
859 launch_timeout: Duration,
861
862 ignore_https_errors: bool,
864 pub viewport: Option<Viewport>,
865 request_timeout: Duration,
867
868 args: Vec<String>,
870
871 disable_default_args: bool,
873
874 pub request_intercept: bool,
876
877 pub cache_enabled: bool,
879 pub service_worker_enabled: bool,
882 pub ignore_visuals: bool,
885 pub ignore_stylesheets: bool,
888 pub ignore_javascript: bool,
891 pub ignore_analytics: bool,
893 pub ignore_prefetch: bool,
895 pub ignore_ads: bool,
897 pub extra_headers: Option<std::collections::HashMap<String, String>>,
899 pub only_html: bool,
901 pub intercept_manager: NetworkInterceptManager,
903 pub max_bytes_allowed: Option<u64>,
905 pub max_redirects: Option<usize>,
908 pub max_main_frame_navigations: Option<u32>,
912 pub whitelist_patterns: Option<Vec<String>>,
914 pub blacklist_patterns: Option<Vec<String>>,
916 #[cfg(feature = "adblock")]
920 pub adblock_filter_rules: Option<Vec<String>>,
921 pub channel_capacity: usize,
924 pub page_channel_capacity: usize,
929 pub connection_retries: u32,
932}
933
934#[derive(Debug, Clone)]
935pub struct BrowserConfigBuilder {
936 headless: HeadlessMode,
938 sandbox: bool,
940 window_size: Option<(u32, u32)>,
942 port: u16,
944 executable: Option<PathBuf>,
947 executation_detection: DetectionOptions,
949 extensions: Vec<String>,
951 process_envs: Option<HashMap<String, String>>,
953 user_data_dir: Option<PathBuf>,
955 incognito: bool,
957 launch_timeout: Duration,
959 ignore_https_errors: bool,
961 viewport: Option<Viewport>,
963 request_timeout: Duration,
965 args: Vec<String>,
967 disable_default_args: bool,
969 request_intercept: bool,
971 cache_enabled: bool,
973 service_worker_enabled: bool,
975 ignore_visuals: bool,
977 ignore_ads: bool,
979 ignore_javascript: bool,
981 ignore_stylesheets: bool,
983 ignore_prefetch: bool,
985 ignore_analytics: bool,
987 only_html: bool,
989 extra_headers: Option<std::collections::HashMap<String, String>>,
991 intercept_manager: NetworkInterceptManager,
993 max_bytes_allowed: Option<u64>,
995 max_redirects: Option<usize>,
997 max_main_frame_navigations: Option<u32>,
999 whitelist_patterns: Option<Vec<String>>,
1001 blacklist_patterns: Option<Vec<String>>,
1003 #[cfg(feature = "adblock")]
1005 adblock_filter_rules: Option<Vec<String>>,
1006 channel_capacity: usize,
1008 page_channel_capacity: usize,
1010 connection_retries: u32,
1012}
1013
1014impl BrowserConfig {
1015 pub fn builder() -> BrowserConfigBuilder {
1017 BrowserConfigBuilder::default()
1018 }
1019
1020 pub fn with_executable(path: impl AsRef<Path>) -> Self {
1022 Self::builder().chrome_executable(path).build().unwrap()
1025 }
1026}
1027
1028impl Default for BrowserConfigBuilder {
1029 fn default() -> Self {
1030 Self {
1031 headless: HeadlessMode::True,
1032 sandbox: true,
1033 window_size: None,
1034 port: 0,
1035 executable: None,
1036 executation_detection: DetectionOptions::default(),
1037 extensions: Vec::new(),
1038 process_envs: None,
1039 user_data_dir: None,
1040 incognito: false,
1041 launch_timeout: Duration::from_millis(LAUNCH_TIMEOUT),
1042 ignore_https_errors: true,
1043 viewport: Some(Default::default()),
1044 request_timeout: Duration::from_millis(REQUEST_TIMEOUT),
1045 args: Vec::new(),
1046 disable_default_args: false,
1047 request_intercept: false,
1048 cache_enabled: true,
1049 ignore_visuals: false,
1050 ignore_ads: false,
1051 ignore_javascript: false,
1052 ignore_analytics: false,
1053 ignore_stylesheets: false,
1054 ignore_prefetch: true,
1055 only_html: false,
1056 extra_headers: Default::default(),
1057 service_worker_enabled: true,
1058 intercept_manager: NetworkInterceptManager::Unknown,
1059 max_bytes_allowed: None,
1060 max_redirects: None,
1061 max_main_frame_navigations: None,
1062 whitelist_patterns: None,
1063 blacklist_patterns: None,
1064 #[cfg(feature = "adblock")]
1065 adblock_filter_rules: None,
1066 channel_capacity: 4096,
1067 page_channel_capacity: crate::handler::page::DEFAULT_PAGE_CHANNEL_CAPACITY,
1068 connection_retries: crate::conn::DEFAULT_CONNECTION_RETRIES,
1069 }
1070 }
1071}
1072
1073impl BrowserConfigBuilder {
1074 pub fn window_size(mut self, width: u32, height: u32) -> Self {
1076 self.window_size = Some((width, height));
1077 self
1078 }
1079 pub fn no_sandbox(mut self) -> Self {
1081 self.sandbox = false;
1082 self
1083 }
1084 pub fn with_head(mut self) -> Self {
1086 self.headless = HeadlessMode::False;
1087 self
1088 }
1089 pub fn new_headless_mode(mut self) -> Self {
1091 self.headless = HeadlessMode::New;
1092 self
1093 }
1094 pub fn headless_mode(mut self, mode: HeadlessMode) -> Self {
1096 self.headless = mode;
1097 self
1098 }
1099 pub fn incognito(mut self) -> Self {
1101 self.incognito = true;
1102 self
1103 }
1104
1105 pub fn respect_https_errors(mut self) -> Self {
1106 self.ignore_https_errors = false;
1107 self
1108 }
1109
1110 pub fn port(mut self, port: u16) -> Self {
1111 self.port = port;
1112 self
1113 }
1114
1115 pub fn with_max_bytes_allowed(mut self, max_bytes_allowed: Option<u64>) -> Self {
1116 self.max_bytes_allowed = max_bytes_allowed;
1117 self
1118 }
1119
1120 pub fn with_max_redirects(mut self, max_redirects: Option<usize>) -> Self {
1126 self.max_redirects = max_redirects;
1127 self
1128 }
1129
1130 pub fn with_max_main_frame_navigations(mut self, cap: Option<u32>) -> Self {
1138 self.max_main_frame_navigations = cap;
1139 self
1140 }
1141
1142 pub fn launch_timeout(mut self, timeout: Duration) -> Self {
1143 self.launch_timeout = timeout;
1144 self
1145 }
1146
1147 pub fn request_timeout(mut self, timeout: Duration) -> Self {
1148 self.request_timeout = timeout;
1149 self
1150 }
1151
1152 pub fn viewport(mut self, viewport: impl Into<Option<Viewport>>) -> Self {
1158 self.viewport = viewport.into();
1159 self
1160 }
1161
1162 pub fn user_data_dir(mut self, data_dir: impl AsRef<Path>) -> Self {
1163 self.user_data_dir = Some(data_dir.as_ref().to_path_buf());
1164 self
1165 }
1166
1167 pub fn chrome_executable(mut self, path: impl AsRef<Path>) -> Self {
1168 self.executable = Some(path.as_ref().to_path_buf());
1169 self
1170 }
1171
1172 pub fn chrome_detection(mut self, options: DetectionOptions) -> Self {
1173 self.executation_detection = options;
1174 self
1175 }
1176
1177 pub fn extension(mut self, extension: impl Into<String>) -> Self {
1178 self.extensions.push(extension.into());
1179 self
1180 }
1181
1182 pub fn extensions<I, S>(mut self, extensions: I) -> Self
1183 where
1184 I: IntoIterator<Item = S>,
1185 S: Into<String>,
1186 {
1187 for ext in extensions {
1188 self.extensions.push(ext.into());
1189 }
1190 self
1191 }
1192
1193 pub fn env(mut self, key: impl Into<String>, val: impl Into<String>) -> Self {
1194 self.process_envs
1195 .get_or_insert(HashMap::new())
1196 .insert(key.into(), val.into());
1197 self
1198 }
1199
1200 pub fn envs<I, K, V>(mut self, envs: I) -> Self
1201 where
1202 I: IntoIterator<Item = (K, V)>,
1203 K: Into<String>,
1204 V: Into<String>,
1205 {
1206 self.process_envs
1207 .get_or_insert(HashMap::new())
1208 .extend(envs.into_iter().map(|(k, v)| (k.into(), v.into())));
1209 self
1210 }
1211
1212 pub fn arg(mut self, arg: impl Into<String>) -> Self {
1213 self.args.push(arg.into());
1214 self
1215 }
1216
1217 pub fn args<I, S>(mut self, args: I) -> Self
1218 where
1219 I: IntoIterator<Item = S>,
1220 S: Into<String>,
1221 {
1222 for arg in args {
1223 self.args.push(arg.into());
1224 }
1225 self
1226 }
1227
1228 pub fn disable_default_args(mut self) -> Self {
1229 self.disable_default_args = true;
1230 self
1231 }
1232
1233 pub fn enable_request_intercept(mut self) -> Self {
1234 self.request_intercept = true;
1235 self
1236 }
1237
1238 pub fn disable_request_intercept(mut self) -> Self {
1239 self.request_intercept = false;
1240 self
1241 }
1242
1243 pub fn enable_cache(mut self) -> Self {
1244 self.cache_enabled = true;
1245 self
1246 }
1247
1248 pub fn disable_cache(mut self) -> Self {
1249 self.cache_enabled = false;
1250 self
1251 }
1252
1253 pub fn set_service_worker_enabled(mut self, bypass: bool) -> Self {
1255 self.service_worker_enabled = bypass;
1256 self
1257 }
1258
1259 pub fn set_extra_headers(
1261 mut self,
1262 headers: Option<std::collections::HashMap<String, String>>,
1263 ) -> Self {
1264 self.extra_headers = headers;
1265 self
1266 }
1267
1268 pub fn set_whitelist_patterns(mut self, whitelist_patterns: Option<Vec<String>>) -> Self {
1270 self.whitelist_patterns = whitelist_patterns;
1271 self
1272 }
1273
1274 pub fn set_blacklist_patterns(mut self, blacklist_patterns: Option<Vec<String>>) -> Self {
1276 self.blacklist_patterns = blacklist_patterns;
1277 self
1278 }
1279
1280 #[cfg(feature = "adblock")]
1283 pub fn set_adblock_filter_rules(mut self, rules: Vec<String>) -> Self {
1284 self.adblock_filter_rules = Some(rules);
1285 self
1286 }
1287
1288 pub fn channel_capacity(mut self, capacity: usize) -> Self {
1291 self.channel_capacity = capacity;
1292 self
1293 }
1294
1295 pub fn page_channel_capacity(mut self, capacity: usize) -> Self {
1304 self.page_channel_capacity = capacity;
1305 self
1306 }
1307
1308 pub fn connection_retries(mut self, retries: u32) -> Self {
1311 self.connection_retries = retries;
1312 self
1313 }
1314
1315 pub fn build(self) -> std::result::Result<BrowserConfig, String> {
1317 let executable = if let Some(e) = self.executable {
1318 e
1319 } else {
1320 detection::default_executable(self.executation_detection)?
1321 };
1322
1323 Ok(BrowserConfig {
1324 headless: self.headless,
1325 sandbox: self.sandbox,
1326 window_size: self.window_size,
1327 port: self.port,
1328 executable,
1329 extensions: self.extensions,
1330 process_envs: self.process_envs,
1331 user_data_dir: self.user_data_dir,
1332 incognito: self.incognito,
1333 launch_timeout: self.launch_timeout,
1334 ignore_https_errors: self.ignore_https_errors,
1335 viewport: self.viewport,
1336 request_timeout: self.request_timeout,
1337 args: self.args,
1338 disable_default_args: self.disable_default_args,
1339 request_intercept: self.request_intercept,
1340 cache_enabled: self.cache_enabled,
1341 ignore_visuals: self.ignore_visuals,
1342 ignore_ads: self.ignore_ads,
1343 ignore_javascript: self.ignore_javascript,
1344 ignore_analytics: self.ignore_analytics,
1345 ignore_stylesheets: self.ignore_stylesheets,
1346 ignore_prefetch: self.ignore_prefetch,
1347 extra_headers: self.extra_headers,
1348 only_html: self.only_html,
1349 intercept_manager: self.intercept_manager,
1350 service_worker_enabled: self.service_worker_enabled,
1351 max_bytes_allowed: self.max_bytes_allowed,
1352 max_redirects: self.max_redirects,
1353 max_main_frame_navigations: self.max_main_frame_navigations,
1354 whitelist_patterns: self.whitelist_patterns,
1355 blacklist_patterns: self.blacklist_patterns,
1356 #[cfg(feature = "adblock")]
1357 adblock_filter_rules: self.adblock_filter_rules,
1358 channel_capacity: self.channel_capacity,
1359 page_channel_capacity: self.page_channel_capacity,
1360 connection_retries: self.connection_retries,
1361 })
1362 }
1363}
1364
1365impl BrowserConfig {
1366 pub fn launch(&self) -> io::Result<Child> {
1367 let mut cmd = async_process::Command::new(&self.executable);
1368
1369 if self.disable_default_args {
1370 cmd.args(&self.args);
1371 } else {
1372 cmd.args(DEFAULT_ARGS).args(&self.args);
1373 }
1374
1375 if !self
1376 .args
1377 .iter()
1378 .any(|arg| arg.contains("--remote-debugging-port="))
1379 {
1380 cmd.arg(format!("--remote-debugging-port={}", self.port));
1381 }
1382
1383 cmd.args(
1384 self.extensions
1385 .iter()
1386 .map(|e| format!("--load-extension={e}")),
1387 );
1388
1389 if let Some(ref user_data) = self.user_data_dir {
1390 cmd.arg(format!("--user-data-dir={}", user_data.display()));
1391 } else {
1392 cmd.arg(format!(
1396 "--user-data-dir={}",
1397 std::env::temp_dir().join("chromiumoxide-runner").display()
1398 ));
1399 }
1400
1401 if let Some((width, height)) = self.window_size {
1402 cmd.arg(format!("--window-size={width},{height}"));
1403 }
1404
1405 if !self.sandbox {
1406 cmd.args(["--no-sandbox", "--disable-setuid-sandbox"]);
1407 }
1408
1409 match self.headless {
1410 HeadlessMode::False => (),
1411 HeadlessMode::True => {
1412 cmd.args(["--headless", "--hide-scrollbars", "--mute-audio"]);
1413 }
1414 HeadlessMode::New => {
1415 cmd.args(["--headless=new", "--hide-scrollbars", "--mute-audio"]);
1416 }
1417 }
1418
1419 if self.incognito {
1420 cmd.arg("--incognito");
1421 }
1422
1423 if let Some(ref envs) = self.process_envs {
1424 cmd.envs(envs);
1425 }
1426 cmd.stderr(Stdio::piped()).spawn()
1427 }
1428}
1429
1430#[deprecated(note = "Use detection::default_executable instead")]
1439pub fn default_executable() -> Result<std::path::PathBuf, String> {
1440 let options = DetectionOptions {
1441 msedge: false,
1442 unstable: false,
1443 };
1444 detection::default_executable(options)
1445}
1446
1447static DEFAULT_ARGS: [&str; 26] = [
1450 "--disable-background-networking",
1451 "--enable-features=NetworkService,NetworkServiceInProcess",
1452 "--disable-background-timer-throttling",
1453 "--disable-backgrounding-occluded-windows",
1454 "--disable-breakpad",
1455 "--disable-client-side-phishing-detection",
1456 "--disable-component-extensions-with-background-pages",
1457 "--disable-default-apps",
1458 "--disable-dev-shm-usage",
1459 "--disable-extensions",
1460 "--disable-features=TranslateUI",
1461 "--disable-hang-monitor",
1462 "--disable-ipc-flooding-protection",
1463 "--disable-popup-blocking",
1464 "--disable-prompt-on-repost",
1465 "--disable-renderer-backgrounding",
1466 "--disable-sync",
1467 "--force-color-profile=srgb",
1468 "--metrics-recording-only",
1469 "--no-first-run",
1470 "--enable-automation",
1471 "--password-store=basic",
1472 "--use-mock-keychain",
1473 "--enable-blink-features=IdleDetection",
1474 "--lang=en_US",
1475 "--disable-blink-features=AutomationControlled",
1476];