1use crate::utils::trie::Trie;
2
3#[cfg(feature = "chrome")]
4use chromiumoxide::handler::blockers::NetworkInterceptManager;
5
6#[derive(Debug, Default, Clone, Copy, PartialEq)]
8#[cfg(not(feature = "chrome"))]
9#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10pub enum NetworkInterceptManager {
11 #[default]
12 Unknown,
14}
15
16#[cfg(not(feature = "chrome"))]
17impl NetworkInterceptManager {
18 pub fn new(_url: &Option<Box<url::Url>>) -> NetworkInterceptManager {
20 NetworkInterceptManager::Unknown
21 }
22}
23
24#[derive(Debug, Default, Clone, PartialEq)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct WaitForIdleNetwork {
28 pub timeout: Option<core::time::Duration>,
30}
31
32impl WaitForIdleNetwork {
33 pub fn new(timeout: Option<core::time::Duration>) -> Self {
35 Self { timeout }
36 }
37}
38
39#[derive(Debug, Default, Clone, PartialEq)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
41pub struct WaitForSelector {
43 pub timeout: Option<core::time::Duration>,
45 pub selector: String,
47}
48
49impl WaitForSelector {
50 pub fn new(timeout: Option<core::time::Duration>, selector: String) -> Self {
52 Self { timeout, selector }
53 }
54}
55
56#[derive(Debug, Default, Clone, PartialEq)]
57#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
58pub struct WaitForDelay {
60 pub timeout: Option<core::time::Duration>,
62}
63
64impl WaitForDelay {
65 pub fn new(timeout: Option<core::time::Duration>) -> Self {
67 Self { timeout }
68 }
69}
70
71#[derive(Debug, Default, Clone, PartialEq)]
72#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73pub struct WaitFor {
75 pub selector: Option<WaitForSelector>,
77 pub idle_network: Option<WaitForIdleNetwork>,
79 pub idle_network0: Option<WaitForIdleNetwork>,
81 pub almost_idle_network0: Option<WaitForIdleNetwork>,
83 pub delay: Option<WaitForDelay>,
85 pub dom: Option<WaitForSelector>,
87 #[cfg_attr(feature = "serde", serde(default))]
88 pub page_navigations: bool,
90}
91
92impl WaitFor {
93 pub fn new(
95 timeout: Option<core::time::Duration>,
96 delay: Option<WaitForDelay>,
97 page_navigations: bool,
98 idle_network: bool,
99 selector: Option<String>,
100 dom: Option<WaitForSelector>,
101 ) -> Self {
102 Self {
103 page_navigations,
104 idle_network: if idle_network {
105 Some(WaitForIdleNetwork::new(timeout))
106 } else {
107 None
108 },
109 idle_network0: None,
110 almost_idle_network0: None,
111 selector: if selector.is_some() {
112 Some(WaitForSelector::new(timeout, selector.unwrap_or_default()))
113 } else {
114 None
115 },
116 delay,
117 dom,
118 }
119 }
120}
121
122#[derive(
123 Debug, Clone, PartialEq, Eq, Hash, Default, strum::EnumString, strum::Display, strum::AsRefStr,
124)]
125#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
126pub enum CaptureScreenshotFormat {
128 #[cfg_attr(feature = "serde", serde(rename = "jpeg"))]
129 Jpeg,
131 #[cfg_attr(feature = "serde", serde(rename = "png"))]
132 #[default]
133 Png,
135 #[cfg_attr(feature = "serde", serde(rename = "webp"))]
136 Webp,
138}
139
140#[cfg(feature = "chrome")]
141impl From<CaptureScreenshotFormat>
142 for chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat
143{
144 fn from(format: CaptureScreenshotFormat) -> Self {
145 match format {
146 CaptureScreenshotFormat::Jpeg => {
147 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Jpeg
148 }
149 CaptureScreenshotFormat::Png => {
150 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png
151 }
152 CaptureScreenshotFormat::Webp => {
153 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Webp
154 }
155 }
156 }
157}
158
159#[derive(Debug, Clone, Copy, PartialEq)]
160#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
161pub struct Viewport {
163 pub width: u32,
165 pub height: u32,
167 pub device_scale_factor: Option<f64>,
169 pub emulating_mobile: bool,
171 pub is_landscape: bool,
173 pub has_touch: bool,
175}
176
177impl Default for Viewport {
178 fn default() -> Self {
179 Viewport {
180 width: 800,
181 height: 600,
182 device_scale_factor: None,
183 emulating_mobile: false,
184 is_landscape: false,
185 has_touch: false,
186 }
187 }
188}
189
190impl Viewport {
191 pub fn new(width: u32, height: u32) -> Self {
193 Viewport {
194 width,
195 height,
196 ..Default::default()
197 }
198 }
199 pub fn set_mobile(&mut self, emulating_mobile: bool) {
201 self.emulating_mobile = emulating_mobile;
202 }
203 pub fn set_landscape(&mut self, is_landscape: bool) {
205 self.is_landscape = is_landscape;
206 }
207 pub fn set_touch(&mut self, has_touch: bool) {
209 self.has_touch = has_touch;
210 }
211 pub fn set_scale_factor(&mut self, device_scale_factor: Option<f64>) {
213 self.device_scale_factor = device_scale_factor;
214 }
215}
216
217#[cfg(feature = "chrome")]
218impl From<Viewport> for chromiumoxide::handler::viewport::Viewport {
219 fn from(viewport: Viewport) -> Self {
220 Self {
221 width: viewport.width,
222 height: viewport.height,
223 device_scale_factor: viewport.device_scale_factor,
224 emulating_mobile: viewport.emulating_mobile,
225 is_landscape: viewport.is_landscape,
226 has_touch: viewport.has_touch,
227 }
228 }
229}
230
231impl From<Viewport> for spider_fingerprint::spoof_viewport::Viewport {
232 fn from(viewport: Viewport) -> Self {
233 Self {
234 width: viewport.width,
235 height: viewport.height,
236 device_scale_factor: viewport.device_scale_factor,
237 emulating_mobile: viewport.emulating_mobile,
238 is_landscape: viewport.is_landscape,
239 has_touch: viewport.has_touch,
240 }
241 }
242}
243
244#[doc = "Capture page screenshot.\n[captureScreenshot](https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-captureScreenshot)"]
245#[derive(Debug, Clone, Default, PartialEq)]
246#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
247pub struct CaptureScreenshotParams {
248 #[doc = "Image compression format (defaults to png)."]
249 pub format: Option<CaptureScreenshotFormat>,
250 #[doc = "Compression quality from range [0..100] (jpeg only)."]
251 pub quality: Option<i64>,
252 #[doc = "Capture the screenshot of a given region only."]
253 pub clip: Option<ClipViewport>,
254 #[doc = "Capture the screenshot from the surface, rather than the view. Defaults to true."]
255 pub from_surface: Option<bool>,
256 #[doc = "Capture the screenshot beyond the viewport. Defaults to false."]
257 pub capture_beyond_viewport: Option<bool>,
258}
259
260#[derive(Debug, Clone, PartialEq)]
261#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
262pub struct ClipViewport {
264 #[doc = "X offset in device independent pixels (dip)."]
265 #[cfg_attr(feature = "serde", serde(rename = "x"))]
266 pub x: f64,
267 #[doc = "Y offset in device independent pixels (dip)."]
268 #[cfg_attr(feature = "serde", serde(rename = "y"))]
269 pub y: f64,
270 #[doc = "Rectangle width in device independent pixels (dip)."]
271 #[cfg_attr(feature = "serde", serde(rename = "width"))]
272 pub width: f64,
273 #[doc = "Rectangle height in device independent pixels (dip)."]
274 #[cfg_attr(feature = "serde", serde(rename = "height"))]
275 pub height: f64,
276 #[doc = "Page scale factor."]
277 #[cfg_attr(feature = "serde", serde(rename = "scale"))]
278 pub scale: f64,
279}
280
281#[cfg(feature = "chrome")]
282impl From<ClipViewport> for chromiumoxide::cdp::browser_protocol::page::Viewport {
283 fn from(viewport: ClipViewport) -> Self {
284 Self {
285 x: viewport.x,
286 y: viewport.y,
287 height: viewport.height,
288 width: viewport.width,
289 scale: viewport.scale,
290 }
291 }
292}
293
294#[derive(Debug, Default, Clone, PartialEq)]
296#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
297pub struct ScreenShotConfig {
298 pub params: ScreenshotParams,
300 pub bytes: bool,
302 pub save: bool,
304 pub output_dir: Option<std::path::PathBuf>,
306}
307
308impl ScreenShotConfig {
309 pub fn new(
311 params: ScreenshotParams,
312 bytes: bool,
313 save: bool,
314 output_dir: Option<std::path::PathBuf>,
315 ) -> Self {
316 Self {
317 params,
318 bytes,
319 save,
320 output_dir,
321 }
322 }
323}
324
325#[derive(Default, Debug, Clone, PartialEq)]
327#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
328pub struct ScreenshotParams {
329 pub cdp_params: CaptureScreenshotParams,
331 pub full_page: Option<bool>,
333 pub omit_background: Option<bool>,
335}
336
337impl ScreenshotParams {
338 pub fn new(
340 cdp_params: CaptureScreenshotParams,
341 full_page: Option<bool>,
342 omit_background: Option<bool>,
343 ) -> Self {
344 Self {
345 cdp_params,
346 full_page,
347 omit_background,
348 }
349 }
350}
351
352#[cfg(feature = "chrome")]
353impl From<ScreenshotParams> for chromiumoxide::page::ScreenshotParams {
354 fn from(params: ScreenshotParams) -> Self {
355 let full_page = if params.full_page.is_some() {
356 params.full_page.unwrap_or_default()
357 } else {
358 std::env::var("SCREENSHOT_FULL_PAGE").unwrap_or_default() == "true"
359 };
360 let omit_background = if params.omit_background.is_some() {
361 params.omit_background.unwrap_or_default()
362 } else {
363 match std::env::var("SCREENSHOT_OMIT_BACKGROUND") {
364 Ok(t) => t == "true",
365 _ => true,
366 }
367 };
368 let format = if params.cdp_params.format.is_some() {
369 match params.cdp_params.format {
370 Some(v) => v.into(),
371 _ => chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png,
372 }
373 } else {
374 chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png
375 };
376
377 let params_builder = chromiumoxide::page::ScreenshotParams::builder()
378 .format(format)
379 .full_page(full_page)
380 .omit_background(omit_background);
381
382 let params_builder = if params.cdp_params.quality.is_some() {
383 params_builder.quality(params.cdp_params.quality.unwrap_or(75))
384 } else {
385 params_builder
386 };
387
388 let params_builder = if params.cdp_params.clip.is_some() {
389 match params.cdp_params.clip {
390 Some(vp) => params_builder.clip(
391 chromiumoxide::cdp::browser_protocol::page::Viewport::from(vp),
392 ),
393 _ => params_builder,
394 }
395 } else {
396 params_builder
397 };
398
399 let params_builder = if params.cdp_params.capture_beyond_viewport.is_some() {
400 match params.cdp_params.capture_beyond_viewport {
401 Some(capture_beyond_viewport) => {
402 params_builder.capture_beyond_viewport(capture_beyond_viewport)
403 }
404 _ => params_builder,
405 }
406 } else {
407 params_builder
408 };
409
410 let params_builder = if params.cdp_params.from_surface.is_some() {
411 match params.cdp_params.from_surface {
412 Some(from_surface) => params_builder.from_surface(from_surface),
413 _ => params_builder,
414 }
415 } else {
416 params_builder
417 };
418
419 params_builder.build()
420 }
421}
422
423#[doc = "The decision on what to do in response to the authorization challenge. Default means\ndeferring to the default behavior of the net stack, which will likely either the Cancel\nauthentication or display a popup dialog box."]
424#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
425#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
426pub enum AuthChallengeResponseResponse {
427 #[default]
428 Default,
430 CancelAuth,
432 ProvideCredentials,
434}
435
436#[doc = "Response to an AuthChallenge.\n[AuthChallengeResponse](https://chromedevtools.github.io/devtools-protocol/tot/Fetch/#type-AuthChallengeResponse)"]
437#[derive(Default, Debug, Clone, PartialEq)]
438#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
439pub struct AuthChallengeResponse {
440 #[doc = "The decision on what to do in response to the authorization challenge. Default means\ndeferring to the default behavior of the net stack, which will likely either the Cancel\nauthentication or display a popup dialog box."]
441 pub response: AuthChallengeResponseResponse,
442 #[doc = "The username to provide, possibly empty. Should only be set if response is\nProvideCredentials."]
443 pub username: Option<String>,
444 #[doc = "The password to provide, possibly empty. Should only be set if response is\nProvideCredentials."]
445 pub password: Option<String>,
446}
447
448#[cfg(feature = "chrome")]
449impl From<AuthChallengeResponse>
450 for chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponse
451{
452 fn from(auth_challenge_response: AuthChallengeResponse) -> Self {
453 Self {
454 response: match auth_challenge_response.response {
455 AuthChallengeResponseResponse::CancelAuth => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::CancelAuth ,
456 AuthChallengeResponseResponse::ProvideCredentials => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::ProvideCredentials ,
457 AuthChallengeResponseResponse::Default => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::Default ,
458
459 },
460 username: auth_challenge_response.username,
461 password: auth_challenge_response.password
462 }
463 }
464}
465
466#[derive(Debug, Clone, PartialEq)]
468#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
469pub enum WebAutomation {
470 Evaluate(String),
472 Click(String),
474 ClickAll(String),
476 ClickPoint {
478 x: f64,
480 y: f64,
482 },
483 ClickHold {
485 selector: String,
487 hold_ms: u64,
489 },
490 ClickHoldPoint {
492 x: f64,
494 y: f64,
496 hold_ms: u64,
498 },
499 ClickDrag {
501 from: String,
503 to: String,
505 modifier: Option<i64>,
507 },
508
509 ClickDragPoint {
511 from_x: f64,
513 from_y: f64,
515 to_x: f64,
517 to_y: f64,
519 modifier: Option<i64>,
521 },
522 ClickAllClickable(),
524 Wait(u64),
526 WaitForNavigation,
528 WaitForDom {
530 selector: Option<String>,
532 timeout: u32,
534 },
535 WaitFor(String),
537 WaitForWithTimeout {
539 selector: String,
541 timeout: u64,
543 },
544 WaitForAndClick(String),
546 ScrollX(i32),
548 ScrollY(i32),
550 Fill {
552 selector: String,
554 value: String,
556 },
557 Type {
559 value: String,
561 modifier: Option<i64>,
563 },
564 InfiniteScroll(u32),
566 Screenshot {
568 full_page: bool,
570 omit_background: bool,
572 output: String,
574 },
575 ValidateChain,
577}
578
579impl WebAutomation {
580 pub fn name(&self) -> &'static str {
582 use WebAutomation::*;
583 match self {
584 Evaluate(_) => "Evaluate",
585 Click(_) => "Click",
586 ClickDrag { .. } => "ClickDrag",
587 ClickDragPoint { .. } => "ClickDragPoint",
588 ClickHold { .. } => "ClickHold",
589 ClickHoldPoint { .. } => "ClickHoldPoint",
590 ClickAll(_) => "ClickAll",
591 ClickAllClickable() => "ClickAllClickable",
592 ClickPoint { .. } => "ClickPoint",
593 Wait(_) => "Wait",
594 WaitForNavigation => "WaitForNavigation",
595 WaitForDom { .. } => "WaitForDom",
596 WaitFor(_) => "WaitFor",
597 WaitForWithTimeout { .. } => "WaitForWithTimeout",
598 WaitForAndClick(_) => "WaitForAndClick",
599 ScrollX(_) => "ScrollX",
600 ScrollY(_) => "ScrollY",
601 Fill { .. } => "Fill",
602 Type { .. } => "Type",
603 InfiniteScroll(_) => "InfiniteScroll",
604 Screenshot { .. } => "Screenshot",
605 ValidateChain => "ValidateChain",
606 }
607 }
608
609 pub fn label(&self) -> String {
611 use WebAutomation::*;
612 match self {
613 Evaluate(_) => "Evaluate JS".into(),
614 Click(s) => format!("Click {}", s),
615 ClickHold { selector, hold_ms } => {
616 format!("ClickHold {} ({}ms)", selector, hold_ms)
617 }
618 ClickHoldPoint { x, y, hold_ms } => {
619 format!("ClickHoldPoint x:{} y:{} ({}ms)", x, y, hold_ms)
620 }
621 ClickDrag { from, to, modifier } => {
622 format!("ClickDrag {} -> {} modifier={:?}", from, to, modifier)
623 }
624 ClickDragPoint {
625 from_x,
626 from_y,
627 to_x,
628 to_y,
629 modifier,
630 } => format!(
631 "ClickDragPoint ({},{}) -> ({},{}) modifier={:?}",
632 from_x, from_y, to_x, to_y, modifier
633 ),
634 ClickAll(s) => format!("ClickAll {}", s),
635 ClickAllClickable() => "ClickAllClickable".into(),
636 Wait(ms) => format!("Wait {}ms", ms),
637 WaitForNavigation => "WaitForNavigation".into(),
638 ClickPoint { x, y } => {
639 format!("ClickPoint x:{} y:{}", x, y)
640 }
641 WaitForDom { selector, timeout } => selector
642 .as_ref()
643 .map(|s| format!("WaitForDom {} ({}ms)", s, timeout))
644 .unwrap_or_else(|| format!("WaitForDom ({}ms)", timeout)),
645 WaitFor(s) => format!("WaitFor {}", s),
646 WaitForWithTimeout { selector, timeout } => {
647 format!("WaitForWithTimeout {} ({}ms)", selector, timeout)
648 }
649 WaitForAndClick(s) => format!("WaitForAndClick {}", s),
650 ScrollX(dx) => format!("ScrollX {}", dx),
651 ScrollY(dy) => format!("ScrollY {}", dy),
652 Fill { selector, .. } => format!("Fill {}", selector),
653 Type { value, modifier } => format!("Type {} modifier={:?}", value, modifier),
654 InfiniteScroll(n) => format!("InfiniteScroll {}", n),
655 Screenshot {
656 full_page,
657 omit_background,
658 output,
659 } => format!(
660 "Screenshot full={} omit_bg={} -> {}",
661 full_page, omit_background, output
662 ),
663 ValidateChain => "ValidateChain".into(),
664 }
665 }
666}
667
668impl core::fmt::Display for WebAutomation {
669 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
670 f.write_str(&self.label())
671 }
672}
673
674#[cfg(feature = "chrome")]
675pub(crate) fn generate_wait_for_dom_js_v2(
677 timeout_ms: u32,
678 selector: &str,
679 quiet_ms: u32,
680 stable_frames: u32,
681 require_visible: bool,
682 require_images_complete: bool,
683) -> String {
684 let t = timeout_ms.min(crate::utils::FIVE_MINUTES);
685 let q = quiet_ms.max(50).min(t);
686 let f = stable_frames.max(1).min(10);
687 let s = if selector.is_empty() {
688 "body"
689 } else {
690 selector
691 };
692
693 format!(
694 r###"(()=>new Promise(R=>{{const S={s:?},T={t},Q={q},F={f},V={vis},I={img},P=["#__next","#__nuxt","#app","#root","main","body"],N=()=>performance.now(),W=e=>{{if(!e)return!1;const t=getComputedStyle(e);if("none"===t.display||"hidden"===t.visibility||"0"===t.opacity)return!1;const r=e.getBoundingClientRect();return r.width>0&&r.height>0}},M=e=>{{if(!e)return!1;const t=e.querySelectorAll("img");for(let e=0;e<t.length;e++){{const r=t[e];if(!r.complete)return!1;if(0===r.naturalWidth&&0===r.naturalHeight&&r.currentSrc)return!1}}return!0}},k=()=>{{let e=document.querySelector(S);if(e)return e;for(let t=0;t<P.length;t++){{if(e=document.querySelector(P[t]))return e}}return null}},s=N();let e=null,t=null,r=s,o=0;const a=n=>{{t&&t.disconnect(),t=new MutationObserver(()=>{{r=N(),o=0}}),t.observe(n,{{subtree:!0,childList:!0,attributes:!0,characterData:!0}})}},i=()=>{{const n=N();if(n-s>=T)return t&&t.disconnect(),void R(!1);(!e||!document.contains(e))&&(e=k())&&((r=n,o=0),a(e));e&&(V&&!W(e)?o=0:I&&!M(e)?o=0:n-r>=Q?(o++,o>=F&&(t&&t.disconnect(),R(!0))):o=0),requestAnimationFrame(i)}};i()}}))()"###,
695 t = t,
696 q = q,
697 f = f,
698 vis = if require_visible { "true" } else { "false" },
699 img = if require_images_complete {
700 "true"
701 } else {
702 "false"
703 },
704 )
705}
706
707#[cfg(feature = "chrome")]
708pub(crate) fn generate_wait_for_dom_js_code_with_selector(
710 timeout: u32,
711 selector: Option<&str>,
712) -> String {
713 let t = timeout.min(crate::utils::FIVE_MINUTES);
714 let s = selector.unwrap_or("body");
715 format!(
716 "new Promise((r,j)=>{{const s='{s}',t={t},i=220,n=50;let l=Date.now(),el,o,d,c;const check=()=>{{el=document.querySelector(s);if(!el)return;clearInterval(wait);l=Date.now();o=new MutationObserver(()=>{{l=Date.now();}});o.observe(el,{{childList:!0,subtree:!0,attributes:!0,characterData:!0}});d=setTimeout(()=>{{clearInterval(c),o.disconnect(),j(new Error('Dom Timeout.'))}},t);c=setInterval(()=>{{Date.now()-l>=i&&(clearTimeout(d),clearInterval(c),o.disconnect(),r(!0))}},n);}};const wait=setInterval(check,n);check();}});",
717 s = s,
718 t = t
719 )
720}
721
722#[cfg(feature = "chrome")]
739const CLICKABLE_SELECTOR: &str = concat!(
740 "button:not([disabled]),",
741 "input[type='button']:not([disabled]),",
742 "input[type='submit']:not([disabled]),",
743 "input[type='reset']:not([disabled]),",
744 "input[type='checkbox']:not([disabled]),",
745 "input[type='radio']:not([disabled]),",
746 "label[for],",
747 "summary,",
748 "select:not([disabled]),",
749 "textarea:not([disabled]),",
750 "[role='button']:not([aria-disabled='true']),",
751 "[onclick],",
752 "[contenteditable=''],[contenteditable='true'],",
753 "[tabindex]:not([tabindex^='-'],a[href])"
755);
756
757impl WebAutomation {
758 #[cfg(feature = "chrome")]
759 pub async fn run(&self, page: &chromiumoxide::Page) -> bool {
761 use crate::utils::wait_for_selector;
762 use std::time::Duration;
763
764 let mut valid = false;
765
766 match self {
767 WebAutomation::Evaluate(js) => {
768 valid = page.evaluate(js.as_str()).await.is_ok();
769 }
770 WebAutomation::Click(selector) => {
771 if let Ok(ele) = page.find_element(selector).await {
772 valid = ele.click().await.is_ok();
773 }
774 }
775
776 WebAutomation::WaitForWithTimeout { selector, timeout } => {
777 valid =
778 wait_for_selector(page, Some(Duration::from_millis(*timeout)), selector).await;
779 }
780 WebAutomation::Wait(ms) => {
781 tokio::time::sleep(Duration::from_millis(*ms)).await;
782 valid = true;
783 }
784 WebAutomation::ClickHold { selector, hold_ms } => {
785 if let Ok(ele) = page.find_element(selector).await {
786 if let Ok(pt) = ele.clickable_point().await {
787 valid = page
788 .click_and_hold(pt, Duration::from_millis(*hold_ms))
789 .await
790 .is_ok();
791 }
792 }
793 }
794 WebAutomation::ClickHoldPoint { x, y, hold_ms } => {
795 let pt = chromiumoxide::layout::Point { x: *x, y: *y };
796 valid = page
797 .click_and_hold(pt, Duration::from_millis(*hold_ms))
798 .await
799 .is_ok();
800 }
801 WebAutomation::ClickAll(selector) => {
802 if let Ok(eles) = page.find_elements(selector).await {
803 for ele in eles {
804 valid = ele.click().await.is_ok();
805 }
806 }
807 }
808 WebAutomation::ClickDrag { from, to, modifier } => {
809 if let (Ok(from_el), Ok(to_el)) =
810 (page.find_element(from).await, page.find_element(to).await)
811 {
812 if let (Ok(p1), Ok(p2)) = (
813 from_el.clickable_point().await,
814 to_el.clickable_point().await,
815 ) {
816 valid = match modifier {
817 Some(m) => page.click_and_drag_with_modifier(p1, p2, *m).await.is_ok(),
818 None => page.click_and_drag(p1, p2).await.is_ok(),
819 };
820 }
821 }
822 }
823
824 WebAutomation::ClickDragPoint {
825 from_x,
826 from_y,
827 to_x,
828 to_y,
829 modifier,
830 } => {
831 let p1 = chromiumoxide::layout::Point {
832 x: *from_x,
833 y: *from_y,
834 };
835 let p2 = chromiumoxide::layout::Point { x: *to_x, y: *to_y };
836
837 valid = match modifier {
838 Some(m) => page.click_and_drag_with_modifier(p1, p2, *m).await.is_ok(),
839 None => page.click_and_drag(p1, p2).await.is_ok(),
840 };
841 }
842 WebAutomation::ClickAllClickable() => {
843 if let Ok(eles) = page.find_elements(CLICKABLE_SELECTOR).await {
844 for ele in eles {
845 valid = ele.click().await.is_ok();
846 }
847 }
848 }
849 WebAutomation::ClickPoint { x, y } => {
850 valid = page
851 .click(chromiumoxide::layout::Point { x: *x, y: *y })
852 .await
853 .is_ok();
854 }
855 WebAutomation::WaitForDom { selector, timeout } => {
856 valid = page
857 .evaluate(
858 generate_wait_for_dom_js_code_with_selector(*timeout, selector.as_deref())
859 .as_str(),
860 )
861 .await
862 .is_ok();
863 }
864 WebAutomation::WaitFor(selector) => {
865 valid = wait_for_selector(page, Some(Duration::from_secs(60)), selector).await;
866 }
867 WebAutomation::WaitForNavigation => {
868 valid = page.wait_for_navigation().await.is_ok();
869 }
870 WebAutomation::WaitForAndClick(selector) => {
871 valid = wait_for_selector(page, Some(Duration::from_secs(60)), selector).await;
872 if let Ok(ele) = page.find_element(selector).await {
873 valid = ele.click().await.is_ok();
874 }
875 }
876 WebAutomation::ScrollX(px) => {
877 valid = page
878 .scroll_by((*px as f32).into(), 0.0, Default::default())
879 .await
880 .is_ok()
881 }
882 WebAutomation::ScrollY(px) => {
883 valid = page
884 .scroll_by(0.0, (*px as f32).into(), Default::default())
885 .await
886 .is_ok()
887 }
888 WebAutomation::Fill { selector, value } => {
889 if let Ok(ele) = page.find_element(selector).await {
890 if let Ok(el) = ele.click().await {
891 valid = el.type_str(value).await.is_ok();
892 }
893 }
894 }
895 WebAutomation::Type { value, modifier } => {
896 valid = page.type_str_with_modifier(value, *modifier).await.is_ok()
897 }
898 WebAutomation::InfiniteScroll(duration) => {
899 valid = page.evaluate(set_dynamic_scroll(*duration)).await.is_ok();
900 }
901 WebAutomation::Screenshot {
902 full_page,
903 omit_background,
904 output,
905 } => {
906 let mut cdp_params: CaptureScreenshotParams = CaptureScreenshotParams::default();
907 cdp_params.format = Some(CaptureScreenshotFormat::Png);
908
909 let screenshot_params =
910 ScreenshotParams::new(cdp_params, Some(*full_page), Some(*omit_background));
911
912 valid = page
913 .save_screenshot(screenshot_params, output)
914 .await
915 .is_ok();
916 }
917 _ => (),
918 };
919
920 valid
921 }
922}
923
924pub fn set_dynamic_scroll(timeout: u32) -> String {
926 let timeout = timeout.min(crate::utils::FIVE_MINUTES);
927 let s = string_concat!(
928 r###"document.addEventListener('DOMContentLoaded',e=>{let t=null,o=null,n="###,
929 timeout.to_string(),
930 r###",a=Date.now(),i=Date.now(),r=()=>{window.scrollTo(0,document.body.scrollHeight)},l=()=>{o&&o.disconnect(),console.log('Stopped checking for new content.')},c=(e,n)=>{e.forEach(e=>{if(e.isIntersecting){i=Date.now();const n=Date.now();if(n-a>=t||n-i>=1e4)return void l();r(),t=document.querySelector('body > *:last-child'),o.observe(t)}})},s=()=>{t&&(o=new IntersectionObserver(c),o.observe(t))},d=()=>{['load','error','abort'].forEach(e=>{window.addEventListener(e,()=>{const e=document.querySelector('body > *:last-child');e!==t&&(i=Date.now(),t=e,o.observe(t))})})},u=()=>{r(),t=document.querySelector('body > *:last-child'),s(),d()};u(),setTimeout(l,n)});"###
931 );
932
933 s
934}
935
936pub type ExecutionScriptsMap = hashbrown::HashMap<String, String>;
938pub type AutomationScriptsMap = hashbrown::HashMap<String, Vec<WebAutomation>>;
940
941pub type ExecutionScripts = Trie<String>;
943pub type AutomationScripts = Trie<Vec<WebAutomation>>;
945
946#[derive(Debug, Clone, Default, PartialEq)]
947#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
948pub struct RequestInterceptConfiguration {
950 pub enabled: bool,
952 pub block_visuals: bool,
954 pub block_stylesheets: bool,
956 pub block_javascript: bool,
958 pub block_analytics: bool,
960 pub block_ads: bool,
962 pub intercept_manager: NetworkInterceptManager,
964 pub whitelist_patterns: Option<Vec<String>>,
966 pub blacklist_patterns: Option<Vec<String>>,
968}
969
970impl RequestInterceptConfiguration {
971 pub fn new(enabled: bool) -> RequestInterceptConfiguration {
973 RequestInterceptConfiguration {
974 enabled,
975 block_javascript: false,
976 block_visuals: true,
977 block_analytics: true,
978 block_stylesheets: true,
979 block_ads: true,
980 ..Default::default()
981 }
982 }
983 pub fn new_manager(
985 enabled: bool,
986 url: &Option<Box<url::Url>>,
987 ) -> RequestInterceptConfiguration {
988 RequestInterceptConfiguration {
989 enabled,
990 block_javascript: false,
991 block_visuals: true,
992 block_analytics: true,
993 block_stylesheets: true,
994 intercept_manager: NetworkInterceptManager::new(url),
995 ..Default::default()
996 }
997 }
998
999 pub fn setup_intercept_manager(&mut self, url: &Option<Box<url::Url>>) {
1001 self.intercept_manager = NetworkInterceptManager::new(url);
1002 }
1003
1004 pub fn set_whitelist_patterns(&mut self, whitelist_patterns: Option<Vec<String>>) {
1006 self.whitelist_patterns = whitelist_patterns;
1007 }
1008
1009 pub fn set_blacklist_patterns(&mut self, blacklist_patterns: Option<Vec<String>>) {
1011 self.blacklist_patterns = blacklist_patterns;
1012 }
1013
1014 pub fn block_all(&mut self) -> &Self {
1016 self.block_javascript = true;
1017 self.block_analytics = true;
1018 self.block_stylesheets = true;
1019 self.block_visuals = true;
1020 self.block_ads = true;
1021 self
1022 }
1023}
1024
1025pub fn convert_to_trie_execution_scripts(
1027 input: &Option<ExecutionScriptsMap>,
1028) -> Option<Trie<String>> {
1029 match input {
1030 Some(ref scripts) => {
1031 let mut trie = Trie::new();
1032 for (path, script) in scripts {
1033 trie.insert(path, script.clone());
1034 }
1035 Some(trie)
1036 }
1037 None => None,
1038 }
1039}
1040
1041pub fn convert_to_trie_automation_scripts(
1043 input: &Option<AutomationScriptsMap>,
1044) -> Option<Trie<Vec<WebAutomation>>> {
1045 match input {
1046 Some(ref scripts) => {
1047 let mut trie = Trie::new();
1048 for (path, script_list) in scripts {
1049 trie.insert(path, script_list.clone());
1050 }
1051 Some(trie)
1052 }
1053 None => None,
1054 }
1055}
1056
1057#[cfg(feature = "chrome")]
1059pub async fn eval_execution_scripts(
1060 page: &chromiumoxide::Page,
1061 target_url: &str,
1062 execution_scripts: &Option<ExecutionScripts>,
1063) {
1064 if let Some(scripts) = &execution_scripts {
1065 if let Some(script) = scripts.search(target_url) {
1066 let _ = page.evaluate(script.as_str()).await;
1067 } else if scripts.match_all {
1068 if let Some(script) = scripts.root.value.as_ref() {
1069 let _ = page.evaluate(script.as_str()).await;
1070 }
1071 }
1072 }
1073}
1074
1075#[cfg(feature = "chrome")]
1077pub async fn eval_automation_scripts(
1078 page: &chromiumoxide::Page,
1079 target_url: &str,
1080 automation_scripts: &Option<AutomationScripts>,
1081) {
1082 if let Some(script_map) = automation_scripts {
1083 if let Some(scripts) = script_map.search(target_url) {
1084 let mut valid = false;
1085
1086 for script in scripts {
1087 if script == &WebAutomation::ValidateChain && !valid {
1088 break;
1089 }
1090 match tokio::time::timeout(tokio::time::Duration::from_secs(60), script.run(page))
1091 .await
1092 {
1093 Ok(next) => valid = next,
1094 Err(elasped) => {
1095 log::warn!("Script execution timed out for: {target_url} - {elasped}")
1096 }
1097 }
1098 }
1099 } else if script_map.match_all {
1100 if let Some(scripts) = script_map.root.value.as_ref() {
1101 let mut valid = false;
1102
1103 for script in scripts {
1104 if script == &WebAutomation::ValidateChain && !valid {
1105 break;
1106 }
1107 match tokio::time::timeout(
1108 tokio::time::Duration::from_secs(60),
1109 script.run(page),
1110 )
1111 .await
1112 {
1113 Ok(next) => valid = next,
1114 Err(elasped) => {
1115 log::warn!("Script execution timed out for: {target_url} - {elasped}")
1116 }
1117 }
1118 }
1119 }
1120 }
1121 }
1122}
1123
1124#[cfg(feature = "chrome")]
1125pub const OBSERVER_SYMBOL_KEY: &str = "obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c";
1127#[cfg(feature = "chrome")]
1129pub const INSTALL_OBSERVER_JS: &str = r##"(function(){const K=Symbol.for("obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c");if(globalThis[K])return globalThis[K];const Q=[],cap=100,cssPath=e=>{if(!e||1!==e.nodeType)return "#text";const t=[];for(;e&&1===e.nodeType&&e!==document.documentElement;){let n=e.nodeName.toLowerCase();if(e.id){n+="#"+e.id,t.unshift(n);break}let r=e,o=1;for(;(r=r.previousElementSibling)!=null;)if(r.nodeName===e.nodeName)o++;t.unshift(n+":nth-of-type("+o+")"),e=e.parentElement}return t.length?t.join(">"):"html"},state=()=>({url:location.href,title:document.title,viewport:{w:innerWidth,h:innerHeight,dpr:devicePixelRatio||1},scroll:{x:pageXOffset||0,y:pageYOffset||0}}),serialize=m=>"childList"===m.type?{type:"childList",target:cssPath(m.target),added:Array.from(m.addedNodes||[]).slice(0,3).map(n=>1===n.nodeType?n.outerHTML:"#text"),removed:Array.from(m.removedNodes||[]).slice(0,3).map(n=>1===n.nodeType?n.outerHTML:"#text" )}:"attributes"===m.type?{type:"attributes",target:cssPath(m.target),attribute:m.attributeName||"",old:m.oldValue??null,new:m.attributeName?m.target.getAttribute(m.attributeName):null}:{type:m.type,target:cssPath(m.target)},push=e=>{Q.push({t:Date.now(),state:state(),mutations:e?e.map(serialize):[]}),Q.length>cap&&Q.splice(0,Q.length-cap)};new MutationObserver(l=>push(Array.from(l))).observe(document.documentElement,{subtree:!0,childList:!0,attributes:!0,characterData:!1,attributeOldValue:!0});const notify=()=>push(null);["pushState","replaceState"].forEach(k=>{const o=history[k];history[k]=function(){const r=o.apply(this,arguments);return dispatchEvent(new Event("locationchange")),r}});addEventListener("popstate",()=>dispatchEvent(new Event("locationchange")));addEventListener("hashchange",()=>dispatchEvent(new Event("locationchange")));addEventListener("locationchange",notify,{passive:!0});const emitVal=el=>{const now=el&&null!=el.value?""+el.value:null,old=Object.prototype.hasOwnProperty.call(el,"__oldVal")?el.__oldVal:null;el.__oldVal=now,Q.push({t:Date.now(),state:state(),action:"input",target:cssPath(el),old:old,new:now}),Q.length>cap&&Q.splice(0,Q.length-cap)};addEventListener("input",e=>{const t=e.target;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))emitVal(t)},{capture:!0,passive:!0});addEventListener("change",e=>{const t=e.target;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))emitVal(t)},{capture:!0});addEventListener("click",e=>{const el=e.target instanceof Element?e.target:null;Q.push({t:Date.now(),state:state(),action:"click",target:el?cssPath(el):null,meta:el?{tag:el.tagName,id:el.id||null,class:el.className||null,href:el.getAttribute("href")||null,text:(el.textContent||"").trim().slice(0,120)}:null}),Q.length>cap&&Q.splice(0,Q.length-cap)},{capture:!0});const handle={drain:function(){const out=Q.slice();return Q.length=0,out}};return globalThis[K]=handle})()"##;
1130
1131#[cfg(feature = "chrome")]
1133pub const DRAIN_CHANGES_JS: &str = r##"(function(){const h=globalThis[Symbol.for("obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c")];return h&&h.drain?h.drain():[]})()"##;
1134
1135#[cfg(feature = "chrome")]
1136fn gen_symbol_key() -> String {
1138 const ALPH: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
1139 let mut s = String::with_capacity(24);
1140 for _ in 0..24 {
1141 let i = fastrand::usize(..ALPH.len());
1142 s.push(ALPH[i] as char);
1143 }
1144 format!("obs:{}", s)
1145}
1146
1147#[cfg(feature = "chrome")]
1148pub async fn eval_automation_scripts_tracking(
1150 page: &chromiumoxide::Page,
1151 target_url: &str,
1152 automation_scripts: &Option<AutomationScripts>,
1153 automation_data: &mut Vec<crate::page::AutomationResults>,
1154) {
1155 use base64::{engine::general_purpose::STANDARD, Engine};
1156 use chromiumoxide::cdp::js_protocol::runtime::EvaluateParams;
1157
1158 fn eval_expr(expr: &str) -> EvaluateParams {
1159 EvaluateParams::builder()
1160 .expression(expr)
1161 .eval_as_function_fallback(true)
1162 .build()
1163 .expect("eval_expr build failed")
1164 }
1165
1166 fn eval_value(expr: &str) -> EvaluateParams {
1167 EvaluateParams::builder()
1168 .expression(expr)
1169 .return_by_value(true)
1170 .build()
1171 .expect("eval_value build failed")
1172 }
1173
1174 let symbol = gen_symbol_key();
1175
1176 let install = INSTALL_OBSERVER_JS.replacen(OBSERVER_SYMBOL_KEY, &symbol, 1);
1177 let drain = DRAIN_CHANGES_JS.replacen(OBSERVER_SYMBOL_KEY, &symbol, 1);
1178
1179 let _ = page.evaluate(eval_expr(&install)).await;
1180 let _ = page.evaluate(eval_value(&drain)).await;
1181
1182 fn ss_params() -> chromiumoxide::page::ScreenshotParams {
1184 chromiumoxide::page::ScreenshotParams::builder()
1185 .format(chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png)
1186 .full_page(true)
1187 .quality(45)
1188 .omit_background(false)
1189 .build()
1190 }
1191
1192 async fn drain_changes_json(
1194 page: &chromiumoxide::Page,
1195 drain_eval: &str,
1196 ) -> Option<serde_json::Value> {
1197 if let Ok(eval) = page.evaluate(eval_value(drain_eval)).await {
1198 eval.value().cloned()
1199 } else {
1200 None
1201 }
1202 }
1203
1204 let mut run_scripts: Option<&Vec<WebAutomation>> = None;
1205
1206 if let Some(script_map) = automation_scripts {
1207 if let Some(scripts) = script_map.search(target_url) {
1208 run_scripts = Some(scripts);
1209 } else if script_map.match_all {
1210 if let Some(scripts) = script_map.root.value.as_ref() {
1211 run_scripts = Some(scripts);
1212 }
1213 }
1214 }
1215
1216 if let Some(scripts) = run_scripts {
1217 let mut valid = false;
1218
1219 for (idx, script) in scripts.iter().enumerate() {
1220 if script == &WebAutomation::ValidateChain && !valid {
1221 break;
1222 }
1223
1224 let step_name = format!("step_{}_{}", idx + 1, script.label());
1225
1226 let step_result =
1227 tokio::time::timeout(tokio::time::Duration::from_secs(60), script.run(page)).await;
1228
1229 let mut automation_results = crate::page::AutomationResults::default();
1230 automation_results.input = format!("automation:{} url:{}", step_name, target_url);
1231
1232 match step_result {
1233 Ok(next) => {
1234 valid = next;
1235 if let Some(changes_json) = drain_changes_json(page, &drain).await {
1236 automation_results.content_output = changes_json;
1237 }
1238 if let Ok(ss) = page.screenshot(ss_params()).await {
1239 automation_results.screenshot_output = Some(STANDARD.encode(&ss));
1240 }
1241 }
1242 Err(elapsed) => {
1243 automation_results.error = Some(format!(
1244 "Script execution timed out for: {} - {}",
1245 target_url, elapsed
1246 ));
1247 if let Some(changes_json) = drain_changes_json(page, &drain).await {
1248 automation_results.content_output = changes_json;
1249 }
1250 if let Ok(ss) = page.screenshot(ss_params()).await {
1251 automation_results.screenshot_output = Some(STANDARD.encode(&ss));
1252 }
1253 log::warn!("Script execution timed out for: {target_url} - {elapsed}");
1254 }
1255 }
1256
1257 automation_data.push(automation_results);
1258 }
1259 }
1260}