1use crate::utils::trie::Trie;
2
3#[cfg(feature = "chrome")]
4use chromiumoxide::handler::blockers::NetworkInterceptManager;
5
6#[derive(Debug, Default, Clone, Copy, PartialEq)]
8#[cfg(not(feature = "chrome"))]
9#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10pub enum NetworkInterceptManager {
11 #[default]
12 Unknown,
14}
15
16#[cfg(not(feature = "chrome"))]
17impl NetworkInterceptManager {
18 pub fn new(_url: &Option<Box<url::Url>>) -> NetworkInterceptManager {
20 NetworkInterceptManager::Unknown
21 }
22}
23
24#[derive(Debug, Default, Clone, PartialEq)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct WaitForIdleNetwork {
28 pub timeout: Option<core::time::Duration>,
30}
31
32impl WaitForIdleNetwork {
33 pub fn new(timeout: Option<core::time::Duration>) -> Self {
35 Self { timeout }
36 }
37}
38
39#[derive(Debug, Default, Clone, PartialEq)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
41pub struct WaitForSelector {
43 pub timeout: Option<core::time::Duration>,
45 pub selector: String,
47}
48
49impl WaitForSelector {
50 pub fn new(timeout: Option<core::time::Duration>, selector: String) -> Self {
52 Self { timeout, selector }
53 }
54}
55
56#[derive(Debug, Default, Clone, PartialEq)]
57#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
58pub struct WaitForDelay {
60 pub timeout: Option<core::time::Duration>,
62}
63
64impl WaitForDelay {
65 pub fn new(timeout: Option<core::time::Duration>) -> Self {
67 Self { timeout }
68 }
69}
70
71#[derive(Debug, Default, Clone, PartialEq)]
72#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73pub struct WaitFor {
75 pub selector: Option<WaitForSelector>,
77 pub idle_network: Option<WaitForIdleNetwork>,
79 pub idle_network0: Option<WaitForIdleNetwork>,
81 pub almost_idle_network0: Option<WaitForIdleNetwork>,
83 pub delay: Option<WaitForDelay>,
85 pub dom: Option<WaitForSelector>,
87 #[cfg_attr(feature = "serde", serde(default))]
88 pub page_navigations: bool,
90}
91
92impl WaitFor {
93 pub fn new(
95 timeout: Option<core::time::Duration>,
96 delay: Option<WaitForDelay>,
97 page_navigations: bool,
98 idle_network: bool,
99 selector: Option<String>,
100 dom: Option<WaitForSelector>,
101 ) -> Self {
102 Self {
103 page_navigations,
104 idle_network: if idle_network {
105 Some(WaitForIdleNetwork::new(timeout))
106 } else {
107 None
108 },
109 idle_network0: None,
110 almost_idle_network0: None,
111 selector: if selector.is_some() {
112 Some(WaitForSelector::new(timeout, selector.unwrap_or_default()))
113 } else {
114 None
115 },
116 delay,
117 dom,
118 }
119 }
120}
121
122#[derive(
123 Debug, Clone, PartialEq, Eq, Hash, Default, strum::EnumString, strum::Display, strum::AsRefStr,
124)]
125#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
126pub enum CaptureScreenshotFormat {
128 #[cfg_attr(feature = "serde", serde(rename = "jpeg"))]
129 Jpeg,
131 #[cfg_attr(feature = "serde", serde(rename = "png"))]
132 #[default]
133 Png,
135 #[cfg_attr(feature = "serde", serde(rename = "webp"))]
136 Webp,
138}
139
140#[cfg(feature = "chrome")]
141impl From<CaptureScreenshotFormat>
142 for chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat
143{
144 fn from(format: CaptureScreenshotFormat) -> Self {
145 match format {
146 CaptureScreenshotFormat::Jpeg => {
147 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Jpeg
148 }
149 CaptureScreenshotFormat::Png => {
150 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png
151 }
152 CaptureScreenshotFormat::Webp => {
153 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Webp
154 }
155 }
156 }
157}
158
159#[derive(Debug, Clone, Copy, PartialEq)]
160#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
161pub struct Viewport {
163 pub width: u32,
165 pub height: u32,
167 pub device_scale_factor: Option<f64>,
169 pub emulating_mobile: bool,
171 pub is_landscape: bool,
173 pub has_touch: bool,
175}
176
177impl Default for Viewport {
178 fn default() -> Self {
179 Viewport {
180 width: 800,
181 height: 600,
182 device_scale_factor: None,
183 emulating_mobile: false,
184 is_landscape: false,
185 has_touch: false,
186 }
187 }
188}
189
190impl Viewport {
191 pub fn new(width: u32, height: u32) -> Self {
193 Viewport {
194 width,
195 height,
196 ..Default::default()
197 }
198 }
199 pub fn set_mobile(&mut self, emulating_mobile: bool) {
201 self.emulating_mobile = emulating_mobile;
202 }
203 pub fn set_landscape(&mut self, is_landscape: bool) {
205 self.is_landscape = is_landscape;
206 }
207 pub fn set_touch(&mut self, has_touch: bool) {
209 self.has_touch = has_touch;
210 }
211 pub fn set_scale_factor(&mut self, device_scale_factor: Option<f64>) {
213 self.device_scale_factor = device_scale_factor;
214 }
215}
216
217#[cfg(feature = "chrome")]
218impl From<Viewport> for chromiumoxide::handler::viewport::Viewport {
219 fn from(viewport: Viewport) -> Self {
220 Self {
221 width: viewport.width,
222 height: viewport.height,
223 device_scale_factor: viewport.device_scale_factor,
224 emulating_mobile: viewport.emulating_mobile,
225 is_landscape: viewport.is_landscape,
226 has_touch: viewport.has_touch,
227 }
228 }
229}
230
231impl From<Viewport> for spider_fingerprint::spoof_viewport::Viewport {
232 fn from(viewport: Viewport) -> Self {
233 Self {
234 width: viewport.width,
235 height: viewport.height,
236 device_scale_factor: viewport.device_scale_factor,
237 emulating_mobile: viewport.emulating_mobile,
238 is_landscape: viewport.is_landscape,
239 has_touch: viewport.has_touch,
240 }
241 }
242}
243
244#[doc = "Capture page screenshot.\n[captureScreenshot](https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-captureScreenshot)"]
245#[derive(Debug, Clone, Default, PartialEq)]
246#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
247pub struct CaptureScreenshotParams {
248 #[doc = "Image compression format (defaults to png)."]
249 pub format: Option<CaptureScreenshotFormat>,
250 #[doc = "Compression quality from range [0..100] (jpeg only)."]
251 pub quality: Option<i64>,
252 #[doc = "Capture the screenshot of a given region only."]
253 pub clip: Option<ClipViewport>,
254 #[doc = "Capture the screenshot from the surface, rather than the view. Defaults to true."]
255 pub from_surface: Option<bool>,
256 #[doc = "Capture the screenshot beyond the viewport. Defaults to false."]
257 pub capture_beyond_viewport: Option<bool>,
258}
259
260#[derive(Debug, Clone, PartialEq)]
261#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
262pub struct ClipViewport {
264 #[doc = "X offset in device independent pixels (dip)."]
265 #[cfg_attr(feature = "serde", serde(rename = "x"))]
266 pub x: f64,
267 #[doc = "Y offset in device independent pixels (dip)."]
268 #[cfg_attr(feature = "serde", serde(rename = "y"))]
269 pub y: f64,
270 #[doc = "Rectangle width in device independent pixels (dip)."]
271 #[cfg_attr(feature = "serde", serde(rename = "width"))]
272 pub width: f64,
273 #[doc = "Rectangle height in device independent pixels (dip)."]
274 #[cfg_attr(feature = "serde", serde(rename = "height"))]
275 pub height: f64,
276 #[doc = "Page scale factor."]
277 #[cfg_attr(feature = "serde", serde(rename = "scale"))]
278 pub scale: f64,
279}
280
281#[cfg(feature = "chrome")]
282impl From<ClipViewport> for chromiumoxide::cdp::browser_protocol::page::Viewport {
283 fn from(viewport: ClipViewport) -> Self {
284 Self {
285 x: viewport.x,
286 y: viewport.y,
287 height: viewport.height,
288 width: viewport.width,
289 scale: viewport.scale,
290 }
291 }
292}
293
294#[derive(Debug, Default, Clone, PartialEq)]
296#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
297pub struct ScreenShotConfig {
298 pub params: ScreenshotParams,
300 pub bytes: bool,
302 pub save: bool,
304 pub output_dir: Option<std::path::PathBuf>,
306}
307
308impl ScreenShotConfig {
309 pub fn new(
311 params: ScreenshotParams,
312 bytes: bool,
313 save: bool,
314 output_dir: Option<std::path::PathBuf>,
315 ) -> Self {
316 Self {
317 params,
318 bytes,
319 save,
320 output_dir,
321 }
322 }
323}
324
325#[derive(Default, Debug, Clone, PartialEq)]
327#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
328pub struct ScreenshotParams {
329 pub cdp_params: CaptureScreenshotParams,
331 pub full_page: Option<bool>,
333 pub omit_background: Option<bool>,
335}
336
337impl ScreenshotParams {
338 pub fn new(
340 cdp_params: CaptureScreenshotParams,
341 full_page: Option<bool>,
342 omit_background: Option<bool>,
343 ) -> Self {
344 Self {
345 cdp_params,
346 full_page,
347 omit_background,
348 }
349 }
350}
351
352#[cfg(feature = "chrome")]
353impl From<ScreenshotParams> for chromiumoxide::page::ScreenshotParams {
354 fn from(params: ScreenshotParams) -> Self {
355 let full_page = if params.full_page.is_some() {
356 params.full_page.unwrap_or_default()
357 } else {
358 std::env::var("SCREENSHOT_FULL_PAGE").unwrap_or_default() == "true"
359 };
360 let omit_background = if params.omit_background.is_some() {
361 params.omit_background.unwrap_or_default()
362 } else {
363 match std::env::var("SCREENSHOT_OMIT_BACKGROUND") {
364 Ok(t) => t == "true",
365 _ => true,
366 }
367 };
368 let format = if params.cdp_params.format.is_some() {
369 match params.cdp_params.format {
370 Some(v) => v.into(),
371 _ => chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png,
372 }
373 } else {
374 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png
375 };
376
377 let params_builder = chromiumoxide::page::ScreenshotParams::builder()
378 .format(format)
379 .full_page(full_page)
380 .omit_background(omit_background);
381
382 let params_builder = if params.cdp_params.quality.is_some() {
383 params_builder.quality(params.cdp_params.quality.unwrap_or(75))
384 } else {
385 params_builder
386 };
387
388 let params_builder = if params.cdp_params.clip.is_some() {
389 match params.cdp_params.clip {
390 Some(vp) => params_builder.clip(
391 chromiumoxide::cdp::browser_protocol::page::Viewport::from(vp),
392 ),
393 _ => params_builder,
394 }
395 } else {
396 params_builder
397 };
398
399 let params_builder = if params.cdp_params.capture_beyond_viewport.is_some() {
400 match params.cdp_params.capture_beyond_viewport {
401 Some(capture_beyond_viewport) => {
402 params_builder.capture_beyond_viewport(capture_beyond_viewport)
403 }
404 _ => params_builder,
405 }
406 } else {
407 params_builder
408 };
409
410 let params_builder = if params.cdp_params.from_surface.is_some() {
411 match params.cdp_params.from_surface {
412 Some(from_surface) => params_builder.from_surface(from_surface),
413 _ => params_builder,
414 }
415 } else {
416 params_builder
417 };
418
419 params_builder.build()
420 }
421}
422
423#[doc = "The decision on what to do in response to the authorization challenge. Default means\ndeferring to the default behavior of the net stack, which will likely either the Cancel\nauthentication or display a popup dialog box."]
424#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
425#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
426pub enum AuthChallengeResponseResponse {
427 #[default]
428 Default,
430 CancelAuth,
432 ProvideCredentials,
434}
435
436#[doc = "Response to an AuthChallenge.\n[AuthChallengeResponse](https://chromedevtools.github.io/devtools-protocol/tot/Fetch/#type-AuthChallengeResponse)"]
437#[derive(Default, Debug, Clone, PartialEq)]
438#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
439pub struct AuthChallengeResponse {
440 #[doc = "The decision on what to do in response to the authorization challenge. Default means\ndeferring to the default behavior of the net stack, which will likely either the Cancel\nauthentication or display a popup dialog box."]
441 pub response: AuthChallengeResponseResponse,
442 #[doc = "The username to provide, possibly empty. Should only be set if response is\nProvideCredentials."]
443 pub username: Option<String>,
444 #[doc = "The password to provide, possibly empty. Should only be set if response is\nProvideCredentials."]
445 pub password: Option<String>,
446}
447
448#[cfg(feature = "chrome")]
449impl From<AuthChallengeResponse>
450 for chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponse
451{
452 fn from(auth_challenge_response: AuthChallengeResponse) -> Self {
453 Self {
454 response: match auth_challenge_response.response {
455 AuthChallengeResponseResponse::CancelAuth => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::CancelAuth ,
456 AuthChallengeResponseResponse::ProvideCredentials => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::ProvideCredentials ,
457 AuthChallengeResponseResponse::Default => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::Default ,
458
459 },
460 username: auth_challenge_response.username,
461 password: auth_challenge_response.password
462 }
463 }
464}
465
466#[derive(Debug, Clone, PartialEq)]
468#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
469pub enum WebAutomation {
470 Evaluate(String),
472 Click(String),
474 ClickAll(String),
476 ClickPoint {
478 x: f64,
480 y: f64,
482 },
483 ClickHold {
485 selector: String,
487 hold_ms: u64,
489 },
490 ClickHoldPoint {
492 x: f64,
494 y: f64,
496 hold_ms: u64,
498 },
499 ClickDrag {
501 from: String,
503 to: String,
505 modifier: Option<i64>,
507 },
508
509 ClickDragPoint {
511 from_x: f64,
513 from_y: f64,
515 to_x: f64,
517 to_y: f64,
519 modifier: Option<i64>,
521 },
522 ClickAllClickable(),
524 Wait(u64),
526 WaitForNavigation,
528 WaitForLoad {
530 timeout: u64,
532 },
533 WaitForNetworkIdle {
535 timeout: u64,
537 },
538 WaitForNetworkAlmostIdle {
540 timeout: u64,
542 },
543 WaitForDom {
545 selector: Option<String>,
547 timeout: u32,
549 },
550 WaitFor(String),
552 WaitForWithTimeout {
554 selector: String,
556 timeout: u64,
558 },
559 WaitForAndClick(String),
561 ScrollX(i32),
563 ScrollY(i32),
565 Fill {
567 selector: String,
569 value: String,
571 },
572 Type {
574 value: String,
576 modifier: Option<i64>,
578 },
579 InfiniteScroll(u32),
581 Screenshot {
583 full_page: bool,
585 omit_background: bool,
587 output: String,
589 },
590 ValidateChain,
592}
593
594impl WebAutomation {
595 pub fn name(&self) -> &'static str {
597 use WebAutomation::*;
598 match self {
599 Evaluate(_) => "Evaluate",
600 Click(_) => "Click",
601 ClickDrag { .. } => "ClickDrag",
602 ClickDragPoint { .. } => "ClickDragPoint",
603 ClickHold { .. } => "ClickHold",
604 ClickHoldPoint { .. } => "ClickHoldPoint",
605 ClickAll(_) => "ClickAll",
606 ClickAllClickable() => "ClickAllClickable",
607 ClickPoint { .. } => "ClickPoint",
608 Wait(_) => "Wait",
609 WaitForNavigation => "WaitForNavigation",
610 WaitForLoad { .. } => "WaitForLoad",
611 WaitForNetworkIdle { .. } => "WaitForNetworkIdle",
612 WaitForNetworkAlmostIdle { .. } => "WaitForNetworkAlmostIdle",
613 WaitForDom { .. } => "WaitForDom",
614 WaitFor(_) => "WaitFor",
615 WaitForWithTimeout { .. } => "WaitForWithTimeout",
616 WaitForAndClick(_) => "WaitForAndClick",
617 ScrollX(_) => "ScrollX",
618 ScrollY(_) => "ScrollY",
619 Fill { .. } => "Fill",
620 Type { .. } => "Type",
621 InfiniteScroll(_) => "InfiniteScroll",
622 Screenshot { .. } => "Screenshot",
623 ValidateChain => "ValidateChain",
624 }
625 }
626
627 pub fn label(&self) -> String {
629 use WebAutomation::*;
630 match self {
631 Evaluate(_) => "Evaluate JS".into(),
632 Click(s) => format!("Click {}", s),
633 ClickHold { selector, hold_ms } => {
634 format!("ClickHold {} ({}ms)", selector, hold_ms)
635 }
636 ClickHoldPoint { x, y, hold_ms } => {
637 format!("ClickHoldPoint x:{} y:{} ({}ms)", x, y, hold_ms)
638 }
639 ClickDrag { from, to, modifier } => {
640 format!("ClickDrag {} -> {} modifier={:?}", from, to, modifier)
641 }
642 ClickDragPoint {
643 from_x,
644 from_y,
645 to_x,
646 to_y,
647 modifier,
648 } => format!(
649 "ClickDragPoint ({},{}) -> ({},{}) modifier={:?}",
650 from_x, from_y, to_x, to_y, modifier
651 ),
652 ClickAll(s) => format!("ClickAll {}", s),
653 ClickAllClickable() => "ClickAllClickable".into(),
654 Wait(ms) => format!("Wait {}ms", ms),
655 WaitForNavigation => "WaitForNavigation".into(),
656 WaitForLoad { timeout } => format!("WaitForLoad ({}ms)", timeout),
657 WaitForNetworkIdle { timeout } => format!("WaitForNetworkIdle ({}ms)", timeout),
658 WaitForNetworkAlmostIdle { timeout } => {
659 format!("WaitForNetworkAlmostIdle ({}ms)", timeout)
660 }
661 ClickPoint { x, y } => {
662 format!("ClickPoint x:{} y:{}", x, y)
663 }
664 WaitForDom { selector, timeout } => selector
665 .as_ref()
666 .map(|s| format!("WaitForDom {} ({}ms)", s, timeout))
667 .unwrap_or_else(|| format!("WaitForDom ({}ms)", timeout)),
668 WaitFor(s) => format!("WaitFor {}", s),
669 WaitForWithTimeout { selector, timeout } => {
670 format!("WaitForWithTimeout {} ({}ms)", selector, timeout)
671 }
672 WaitForAndClick(s) => format!("WaitForAndClick {}", s),
673 ScrollX(dx) => format!("ScrollX {}", dx),
674 ScrollY(dy) => format!("ScrollY {}", dy),
675 Fill { selector, .. } => format!("Fill {}", selector),
676 Type { value, modifier } => format!("Type {} modifier={:?}", value, modifier),
677 InfiniteScroll(n) => format!("InfiniteScroll {}", n),
678 Screenshot {
679 full_page,
680 omit_background,
681 output,
682 } => format!(
683 "Screenshot full={} omit_bg={} -> {}",
684 full_page, omit_background, output
685 ),
686 ValidateChain => "ValidateChain".into(),
687 }
688 }
689}
690
691impl core::fmt::Display for WebAutomation {
692 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
693 f.write_str(&self.label())
694 }
695}
696
697#[cfg(feature = "chrome")]
698pub(crate) fn generate_wait_for_dom_js_v2(
726 timeout_ms: u32,
727 selector: &str,
728 _quiet_ms: u32,
729 stable_frames: u32,
730 require_visible: bool,
731 require_images_complete: bool,
732) -> String {
733 let t = timeout_ms.min(crate::utils::FIVE_MINUTES);
734 let f = stable_frames.max(1).min(10);
735 let s = if selector.is_empty() {
736 "body"
737 } else {
738 selector
739 };
740
741 format!(
742 r###"(()=>new Promise(R=>{{const S={s:?},T={t},F={f},V={vis},I={img},P=["#__next","#__nuxt","#app","#root","main","body"],N=()=>performance.now(),W=e=>{{if(!e)return!1;const t=getComputedStyle(e);if("none"===t.display||"hidden"===t.visibility||"0"===t.opacity)return!1;const r=e.getBoundingClientRect();return r.width>0&&r.height>0}},M=()=>{{if(!I)return!0;const t=document.images;for(let i=0;i<t.length;i++){{const r=t[i];if(!r.complete)return!1;if(0===r.naturalWidth&&0===r.naturalHeight&&r.currentSrc)return!1}}return!0}},k=()=>{{let e=document.querySelector(S);if(e)return e;for(let t=0;t<P.length;t++){{if(e=document.querySelector(P[t]))return e}}return null}},s=N();let e=null,ob=null,dirty=!1,frames=0;const arm=el=>{{ob&&ob.disconnect();ob=new MutationObserver(()=>{{dirty=!0}});ob.observe(el,{{subtree:!0,childList:!0,attributes:!0,characterData:!0}})}};const begin=()=>{{e=k();if(!e){{requestAnimationFrame(begin);return}}arm(e);const tick=()=>{{const n=N();if(n-s>=T){{ob&&ob.disconnect();return R(!1)}}if(!document.contains(e)){{e=k();if(!e){{requestAnimationFrame(tick);return}}arm(e);frames=0;dirty=!1}}const wasMut=dirty;dirty=!1;if(wasMut){{frames=0;requestAnimationFrame(tick);return}}const docReady="complete"===document.readyState;const fontsReady=!document.fonts||"loaded"===document.fonts.status;const imagesReady=M();const vis=!V||W(e);if(docReady&&fontsReady&&imagesReady&&vis){{frames++;if(frames>=F){{ob.disconnect();return R(!0)}}}}else{{frames=0}}requestAnimationFrame(tick)}};requestAnimationFrame(tick)}};if("complete"===document.readyState)begin();else addEventListener("load",begin,{{once:!0}})}}))()"###,
743 t = t,
744 f = f,
745 vis = if require_visible { "true" } else { "false" },
746 img = if require_images_complete {
747 "true"
748 } else {
749 "false"
750 },
751 )
752}
753
754#[cfg(feature = "chrome")]
755pub(crate) fn generate_wait_for_dom_js_code_with_selector(
757 timeout: u32,
758 selector: Option<&str>,
759) -> String {
760 let t = timeout.min(crate::utils::FIVE_MINUTES);
761 let s = selector.unwrap_or("body");
762 format!(
763 "new Promise((r,j)=>{{const s='{s}',t={t},i=220,n=50;let l=Date.now(),el,o,d,c;const check=()=>{{el=document.querySelector(s);if(!el)return;clearInterval(wait);l=Date.now();o=new MutationObserver(()=>{{l=Date.now();}});o.observe(el,{{childList:!0,subtree:!0,attributes:!0,characterData:!0}});d=setTimeout(()=>{{clearInterval(c),o.disconnect(),j(new Error('Dom Timeout.'))}},t);c=setInterval(()=>{{Date.now()-l>=i&&(clearTimeout(d),clearInterval(c),o.disconnect(),r(!0))}},n);}};const wait=setInterval(check,n);check();}});",
764 s = s,
765 t = t
766 )
767}
768
769#[cfg(feature = "chrome")]
786const CLICKABLE_SELECTOR: &str = concat!(
787 "button:not([disabled]),",
788 "input[type='button']:not([disabled]),",
789 "input[type='submit']:not([disabled]),",
790 "input[type='reset']:not([disabled]),",
791 "input[type='checkbox']:not([disabled]),",
792 "input[type='radio']:not([disabled]),",
793 "label[for],",
794 "summary,",
795 "select:not([disabled]),",
796 "textarea:not([disabled]),",
797 "[role='button']:not([aria-disabled='true']),",
798 "[onclick],",
799 "[contenteditable=''],[contenteditable='true'],",
800 "[tabindex]:not([tabindex^='-'],a[href])"
802);
803
804impl WebAutomation {
805 #[cfg(feature = "chrome")]
806 pub async fn run(&self, page: &chromiumoxide::Page) -> bool {
808 use crate::utils::wait_for_selector;
809 use std::time::Duration;
810
811 let mut valid = false;
812
813 match self {
814 WebAutomation::Evaluate(js) => {
815 valid = page.evaluate(js.as_str()).await.is_ok();
816 }
817 WebAutomation::Click(selector) => {
818 if let Ok(ele) = page.find_element(selector).await {
819 valid = ele.click().await.is_ok();
820 }
821 }
822
823 WebAutomation::WaitForWithTimeout { selector, timeout } => {
824 valid =
825 wait_for_selector(page, Some(Duration::from_millis(*timeout)), selector).await;
826 }
827 WebAutomation::Wait(ms) => {
828 tokio::time::sleep(Duration::from_millis(*ms)).await;
829 valid = true;
830 }
831 WebAutomation::ClickHold { selector, hold_ms } => {
832 if let Ok(ele) = page.find_element(selector).await {
833 if let Ok(pt) = ele.clickable_point().await {
834 valid = page
835 .click_and_hold(pt, Duration::from_millis(*hold_ms))
836 .await
837 .is_ok();
838 }
839 }
840 }
841 WebAutomation::ClickHoldPoint { x, y, hold_ms } => {
842 let pt = chromiumoxide::layout::Point { x: *x, y: *y };
843 valid = page
844 .click_and_hold(pt, Duration::from_millis(*hold_ms))
845 .await
846 .is_ok();
847 }
848 WebAutomation::ClickAll(selector) => {
849 if let Ok(eles) = page.find_elements(selector).await {
850 for ele in eles {
851 valid = ele.click().await.is_ok();
852 }
853 }
854 }
855 WebAutomation::ClickDrag { from, to, modifier } => {
856 if let (Ok(from_el), Ok(to_el)) =
857 (page.find_element(from).await, page.find_element(to).await)
858 {
859 if let (Ok(p1), Ok(p2)) = (
860 from_el.clickable_point().await,
861 to_el.clickable_point().await,
862 ) {
863 valid = match modifier {
864 Some(m) => page.click_and_drag_with_modifier(p1, p2, *m).await.is_ok(),
865 None => page.click_and_drag(p1, p2).await.is_ok(),
866 };
867 }
868 }
869 }
870
871 WebAutomation::ClickDragPoint {
872 from_x,
873 from_y,
874 to_x,
875 to_y,
876 modifier,
877 } => {
878 let p1 = chromiumoxide::layout::Point {
879 x: *from_x,
880 y: *from_y,
881 };
882 let p2 = chromiumoxide::layout::Point { x: *to_x, y: *to_y };
883
884 valid = match modifier {
885 Some(m) => page.click_and_drag_with_modifier(p1, p2, *m).await.is_ok(),
886 None => page.click_and_drag(p1, p2).await.is_ok(),
887 };
888 }
889 WebAutomation::ClickAllClickable() => {
890 if let Ok(eles) = page.find_elements(CLICKABLE_SELECTOR).await {
891 for ele in eles {
892 valid = ele.click().await.is_ok();
893 }
894 }
895 }
896 WebAutomation::ClickPoint { x, y } => {
897 valid = page
898 .click(chromiumoxide::layout::Point { x: *x, y: *y })
899 .await
900 .is_ok();
901 }
902 WebAutomation::WaitForDom { selector, timeout } => {
903 valid = page
904 .evaluate(
905 generate_wait_for_dom_js_code_with_selector(*timeout, selector.as_deref())
906 .as_str(),
907 )
908 .await
909 .is_ok();
910 }
911 WebAutomation::WaitFor(selector) => {
912 valid = wait_for_selector(page, Some(Duration::from_secs(60)), selector).await;
913 }
914 WebAutomation::WaitForNavigation => {
915 valid = page.wait_for_navigation().await.is_ok();
916 }
917 WebAutomation::WaitForLoad { timeout } => {
918 crate::utils::wait_for_event::<
919 chromiumoxide::cdp::browser_protocol::page::EventLoadEventFired,
920 >(page, Some(Duration::from_millis(*timeout)))
921 .await;
922 valid = true;
923 }
924 WebAutomation::WaitForNetworkIdle { timeout } => {
925 let dur = Duration::from_millis(*timeout);
926 valid = page.wait_for_network_idle_with_timeout(dur).await.is_ok();
927 }
928 WebAutomation::WaitForNetworkAlmostIdle { timeout } => {
929 let dur = Duration::from_millis(*timeout);
930 valid = page
931 .wait_for_network_almost_idle_with_timeout(dur)
932 .await
933 .is_ok();
934 }
935 WebAutomation::WaitForAndClick(selector) => {
936 valid = wait_for_selector(page, Some(Duration::from_secs(60)), selector).await;
937 if let Ok(ele) = page.find_element(selector).await {
938 valid = ele.click().await.is_ok();
939 }
940 }
941 WebAutomation::ScrollX(px) => {
942 valid = page
943 .scroll_by((*px as f32).into(), 0.0, Default::default())
944 .await
945 .is_ok()
946 }
947 WebAutomation::ScrollY(px) => {
948 valid = page
949 .scroll_by(0.0, (*px as f32).into(), Default::default())
950 .await
951 .is_ok()
952 }
953 WebAutomation::Fill { selector, value } => {
954 if let Ok(ele) = page.find_element(selector).await {
955 if let Ok(el) = ele.click().await {
956 valid = el.type_str(value).await.is_ok();
957 }
958 }
959 }
960 WebAutomation::Type { value, modifier } => {
961 valid = page.type_str_with_modifier(value, *modifier).await.is_ok()
962 }
963 WebAutomation::InfiniteScroll(duration) => {
964 valid = page.evaluate(set_dynamic_scroll(*duration)).await.is_ok();
965 }
966 WebAutomation::Screenshot {
967 full_page,
968 omit_background,
969 output,
970 } => {
971 let mut cdp_params: CaptureScreenshotParams = CaptureScreenshotParams::default();
972 cdp_params.format = Some(CaptureScreenshotFormat::Png);
973
974 let screenshot_params =
975 ScreenshotParams::new(cdp_params, Some(*full_page), Some(*omit_background));
976
977 valid = page
978 .save_screenshot(screenshot_params, output)
979 .await
980 .is_ok();
981 }
982 _ => (),
983 };
984
985 valid
986 }
987}
988
989pub fn set_dynamic_scroll(timeout: u32) -> String {
991 let timeout = timeout.min(crate::utils::FIVE_MINUTES);
992 let s = string_concat!(
993 r###"document.addEventListener('DOMContentLoaded',e=>{let t=null,o=null,n="###,
994 timeout.to_string(),
995 r###",a=Date.now(),i=Date.now(),r=()=>{window.scrollTo(0,document.body.scrollHeight)},l=()=>{o&&o.disconnect(),console.log('Stopped checking for new content.')},c=(e,n)=>{e.forEach(e=>{if(e.isIntersecting){i=Date.now();const n=Date.now();if(n-a>=t||n-i>=1e4)return void l();r(),t=document.querySelector('body > *:last-child'),o.observe(t)}})},s=()=>{t&&(o=new IntersectionObserver(c),o.observe(t))},d=()=>{['load','error','abort'].forEach(e=>{window.addEventListener(e,()=>{const e=document.querySelector('body > *:last-child');e!==t&&(i=Date.now(),t=e,o.observe(t))})})},u=()=>{r(),t=document.querySelector('body > *:last-child'),s(),d()};u(),setTimeout(l,n)});"###
996 );
997
998 s
999}
1000
1001pub type ExecutionScriptsMap = hashbrown::HashMap<String, String>;
1003pub type AutomationScriptsMap = hashbrown::HashMap<String, Vec<WebAutomation>>;
1005
1006pub type ExecutionScripts = Trie<String>;
1008pub type AutomationScripts = Trie<Vec<WebAutomation>>;
1010
1011#[derive(Debug, Clone, Default, PartialEq)]
1012#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1013pub struct RequestInterceptConfiguration {
1015 pub enabled: bool,
1017 pub block_visuals: bool,
1019 pub block_stylesheets: bool,
1021 pub block_javascript: bool,
1023 pub block_analytics: bool,
1025 pub block_ads: bool,
1027 pub intercept_manager: NetworkInterceptManager,
1029 pub whitelist_patterns: Option<Vec<String>>,
1031 pub blacklist_patterns: Option<Vec<String>>,
1033}
1034
1035impl RequestInterceptConfiguration {
1036 pub fn new(enabled: bool) -> RequestInterceptConfiguration {
1038 RequestInterceptConfiguration {
1039 enabled,
1040 block_javascript: false,
1041 block_visuals: true,
1042 block_analytics: true,
1043 block_stylesheets: true,
1044 block_ads: true,
1045 ..Default::default()
1046 }
1047 }
1048 pub fn new_manager(
1050 enabled: bool,
1051 url: &Option<Box<url::Url>>,
1052 ) -> RequestInterceptConfiguration {
1053 RequestInterceptConfiguration {
1054 enabled,
1055 block_javascript: false,
1056 block_visuals: true,
1057 block_analytics: true,
1058 block_stylesheets: true,
1059 intercept_manager: NetworkInterceptManager::new(url),
1060 ..Default::default()
1061 }
1062 }
1063
1064 pub fn setup_intercept_manager(&mut self, url: &Option<Box<url::Url>>) {
1066 self.intercept_manager = NetworkInterceptManager::new(url);
1067 }
1068
1069 pub fn set_whitelist_patterns(&mut self, whitelist_patterns: Option<Vec<String>>) {
1071 self.whitelist_patterns = whitelist_patterns;
1072 }
1073
1074 pub fn set_blacklist_patterns(&mut self, blacklist_patterns: Option<Vec<String>>) {
1076 self.blacklist_patterns = blacklist_patterns;
1077 }
1078
1079 pub fn block_all(&mut self) -> &Self {
1081 self.block_javascript = true;
1082 self.block_analytics = true;
1083 self.block_stylesheets = true;
1084 self.block_visuals = true;
1085 self.block_ads = true;
1086 self
1087 }
1088}
1089
1090pub fn convert_to_trie_execution_scripts(
1092 input: &Option<ExecutionScriptsMap>,
1093) -> Option<Trie<String>> {
1094 match input {
1095 Some(ref scripts) => {
1096 let mut trie = Trie::new();
1097 for (path, script) in scripts {
1098 trie.insert(path, script.clone());
1099 }
1100 Some(trie)
1101 }
1102 None => None,
1103 }
1104}
1105
1106pub fn convert_to_trie_automation_scripts(
1108 input: &Option<AutomationScriptsMap>,
1109) -> Option<Trie<Vec<WebAutomation>>> {
1110 match input {
1111 Some(ref scripts) => {
1112 let mut trie = Trie::new();
1113 for (path, script_list) in scripts {
1114 trie.insert(path, script_list.clone());
1115 }
1116 Some(trie)
1117 }
1118 None => None,
1119 }
1120}
1121
1122#[cfg(feature = "chrome")]
1124pub async fn eval_execution_scripts(
1125 page: &chromiumoxide::Page,
1126 target_url: &str,
1127 execution_scripts: &Option<ExecutionScripts>,
1128) {
1129 if let Some(scripts) = &execution_scripts {
1130 if let Some(script) = scripts.search(target_url) {
1131 let _ = page.evaluate(script.as_str()).await;
1132 } else if scripts.match_all {
1133 if let Some(script) = scripts.root.value.as_ref() {
1134 let _ = page.evaluate(script.as_str()).await;
1135 }
1136 }
1137 }
1138}
1139
1140#[cfg(feature = "chrome")]
1142pub async fn eval_automation_scripts(
1143 page: &chromiumoxide::Page,
1144 target_url: &str,
1145 automation_scripts: &Option<AutomationScripts>,
1146) {
1147 if let Some(script_map) = automation_scripts {
1148 if let Some(scripts) = script_map.search(target_url) {
1149 let mut valid = false;
1150
1151 for script in scripts {
1152 if script == &WebAutomation::ValidateChain && !valid {
1153 break;
1154 }
1155 match tokio::time::timeout(tokio::time::Duration::from_secs(60), script.run(page))
1156 .await
1157 {
1158 Ok(next) => valid = next,
1159 Err(elasped) => {
1160 log::warn!("Script execution timed out for: {target_url} - {elasped}")
1161 }
1162 }
1163 }
1164 } else if script_map.match_all {
1165 if let Some(scripts) = script_map.root.value.as_ref() {
1166 let mut valid = false;
1167
1168 for script in scripts {
1169 if script == &WebAutomation::ValidateChain && !valid {
1170 break;
1171 }
1172 match tokio::time::timeout(
1173 tokio::time::Duration::from_secs(60),
1174 script.run(page),
1175 )
1176 .await
1177 {
1178 Ok(next) => valid = next,
1179 Err(elasped) => {
1180 log::warn!("Script execution timed out for: {target_url} - {elasped}")
1181 }
1182 }
1183 }
1184 }
1185 }
1186 }
1187}
1188
1189#[cfg(feature = "chrome")]
1190pub const OBSERVER_SYMBOL_KEY: &str = "obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c";
1192#[cfg(feature = "chrome")]
1194pub const INSTALL_OBSERVER_JS: &str = r##"(function(){const K=Symbol.for("obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c");if(globalThis[K])return globalThis[K];const Q=[],cap=100,cssPath=e=>{if(!e||1!==e.nodeType)return "#text";const t=[];for(;e&&1===e.nodeType&&e!==document.documentElement;){let n=e.nodeName.toLowerCase();if(e.id){n+="#"+e.id,t.unshift(n);break}let r=e,o=1;for(;(r=r.previousElementSibling)!=null;)if(r.nodeName===e.nodeName)o++;t.unshift(n+":nth-of-type("+o+")"),e=e.parentElement}return t.length?t.join(">"):"html"},state=()=>({url:location.href,title:document.title,viewport:{w:innerWidth,h:innerHeight,dpr:devicePixelRatio||1},scroll:{x:pageXOffset||0,y:pageYOffset||0}}),serialize=m=>"childList"===m.type?{type:"childList",target:cssPath(m.target),added:Array.from(m.addedNodes||[]).slice(0,3).map(n=>1===n.nodeType?n.outerHTML:"#text"),removed:Array.from(m.removedNodes||[]).slice(0,3).map(n=>1===n.nodeType?n.outerHTML:"#text" )}:"attributes"===m.type?{type:"attributes",target:cssPath(m.target),attribute:m.attributeName||"",old:m.oldValue??null,new:m.attributeName?m.target.getAttribute(m.attributeName):null}:{type:m.type,target:cssPath(m.target)},push=e=>{Q.push({t:Date.now(),state:state(),mutations:e?e.map(serialize):[]}),Q.length>cap&&Q.splice(0,Q.length-cap)};new MutationObserver(l=>push(Array.from(l))).observe(document.documentElement,{subtree:!0,childList:!0,attributes:!0,characterData:!1,attributeOldValue:!0});const notify=()=>push(null);["pushState","replaceState"].forEach(k=>{const o=history[k];history[k]=function(){const r=o.apply(this,arguments);return dispatchEvent(new Event("locationchange")),r}});addEventListener("popstate",()=>dispatchEvent(new Event("locationchange")));addEventListener("hashchange",()=>dispatchEvent(new Event("locationchange")));addEventListener("locationchange",notify,{passive:!0});const emitVal=el=>{const now=el&&null!=el.value?""+el.value:null,old=Object.prototype.hasOwnProperty.call(el,"__oldVal")?el.__oldVal:null;el.__oldVal=now,Q.push({t:Date.now(),state:state(),action:"input",target:cssPath(el),old:old,new:now}),Q.length>cap&&Q.splice(0,Q.length-cap)};addEventListener("input",e=>{const t=e.target;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))emitVal(t)},{capture:!0,passive:!0});addEventListener("change",e=>{const t=e.target;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))emitVal(t)},{capture:!0});addEventListener("click",e=>{const el=e.target instanceof Element?e.target:null;Q.push({t:Date.now(),state:state(),action:"click",target:el?cssPath(el):null,meta:el?{tag:el.tagName,id:el.id||null,class:el.className||null,href:el.getAttribute("href")||null,text:(el.textContent||"").trim().slice(0,120)}:null}),Q.length>cap&&Q.splice(0,Q.length-cap)},{capture:!0});const handle={drain:function(){const out=Q.slice();return Q.length=0,out}};return globalThis[K]=handle})()"##;
1195
1196#[cfg(feature = "chrome")]
1198pub const DRAIN_CHANGES_JS: &str = r##"(function(){const h=globalThis[Symbol.for("obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c")];return h&&h.drain?h.drain():[]})()"##;
1199
1200#[cfg(feature = "chrome")]
1201fn gen_symbol_key() -> String {
1203 const ALPH: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
1204 let mut s = String::with_capacity(24);
1205 for _ in 0..24 {
1206 let i = fastrand::usize(..ALPH.len());
1207 s.push(ALPH[i] as char);
1208 }
1209 format!("obs:{}", s)
1210}
1211
1212#[cfg(feature = "chrome")]
1213pub async fn eval_automation_scripts_tracking(
1215 page: &chromiumoxide::Page,
1216 target_url: &str,
1217 automation_scripts: &Option<AutomationScripts>,
1218 automation_data: &mut Vec<crate::page::AutomationResults>,
1219) {
1220 use base64::{engine::general_purpose::STANDARD, Engine};
1221 use chromiumoxide::cdp::js_protocol::runtime::EvaluateParams;
1222
1223 fn eval_expr(expr: &str) -> EvaluateParams {
1224 EvaluateParams::builder()
1225 .expression(expr)
1226 .eval_as_function_fallback(true)
1227 .build()
1228 .expect("eval_expr build failed")
1229 }
1230
1231 fn eval_value(expr: &str) -> EvaluateParams {
1232 EvaluateParams::builder()
1233 .expression(expr)
1234 .return_by_value(true)
1235 .build()
1236 .expect("eval_value build failed")
1237 }
1238
1239 let symbol = gen_symbol_key();
1240
1241 let install = INSTALL_OBSERVER_JS.replacen(OBSERVER_SYMBOL_KEY, &symbol, 1);
1242 let drain = DRAIN_CHANGES_JS.replacen(OBSERVER_SYMBOL_KEY, &symbol, 1);
1243
1244 let _ = page.evaluate(eval_expr(&install)).await;
1245 let _ = page.evaluate(eval_value(&drain)).await;
1246
1247 fn ss_params() -> chromiumoxide::page::ScreenshotParams {
1249 chromiumoxide::page::ScreenshotParams::builder()
1250 .format(chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png)
1251 .full_page(true)
1252 .quality(45)
1253 .omit_background(false)
1254 .build()
1255 }
1256
1257 async fn drain_changes_json(
1259 page: &chromiumoxide::Page,
1260 drain_eval: &str,
1261 ) -> Option<serde_json::Value> {
1262 if let Ok(eval) = page.evaluate(eval_value(drain_eval)).await {
1263 eval.value().cloned()
1264 } else {
1265 None
1266 }
1267 }
1268
1269 let mut run_scripts: Option<&Vec<WebAutomation>> = None;
1270
1271 if let Some(script_map) = automation_scripts {
1272 if let Some(scripts) = script_map.search(target_url) {
1273 run_scripts = Some(scripts);
1274 } else if script_map.match_all {
1275 if let Some(scripts) = script_map.root.value.as_ref() {
1276 run_scripts = Some(scripts);
1277 }
1278 }
1279 }
1280
1281 if let Some(scripts) = run_scripts {
1282 let mut valid = false;
1283
1284 for (idx, script) in scripts.iter().enumerate() {
1285 if script == &WebAutomation::ValidateChain && !valid {
1286 break;
1287 }
1288
1289 let step_name = format!("step_{}_{}", idx + 1, script.label());
1290
1291 let step_result =
1292 tokio::time::timeout(tokio::time::Duration::from_secs(60), script.run(page)).await;
1293
1294 let mut automation_results = crate::page::AutomationResults::default();
1295 automation_results.input = format!("automation:{} url:{}", step_name, target_url);
1296
1297 match step_result {
1298 Ok(next) => {
1299 valid = next;
1300 if let Some(changes_json) = drain_changes_json(page, &drain).await {
1301 automation_results.content_output = changes_json;
1302 }
1303 if let Ok(ss) = page.screenshot(ss_params()).await {
1304 automation_results.screenshot_output = Some(STANDARD.encode(&ss));
1305 }
1306 }
1307 Err(elapsed) => {
1308 automation_results.error = Some(format!(
1309 "Script execution timed out for: {} - {}",
1310 target_url, elapsed
1311 ));
1312 if let Some(changes_json) = drain_changes_json(page, &drain).await {
1313 automation_results.content_output = changes_json;
1314 }
1315 if let Ok(ss) = page.screenshot(ss_params()).await {
1316 automation_results.screenshot_output = Some(STANDARD.encode(&ss));
1317 }
1318 log::warn!("Script execution timed out for: {target_url} - {elapsed}");
1319 }
1320 }
1321
1322 automation_data.push(automation_results);
1323 }
1324 }
1325}