Skip to main content

spider/features/
chrome_common.rs

1use crate::utils::trie::Trie;
2
3#[cfg(feature = "chrome")]
4use chromiumoxide::handler::blockers::NetworkInterceptManager;
5
6/// wrapper for non chrome interception. does nothing.
7#[derive(Debug, Default, Clone, Copy, PartialEq)]
8#[cfg(not(feature = "chrome"))]
9#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
10pub enum NetworkInterceptManager {
11    #[default]
12    /// Unknown
13    Unknown,
14}
15
16#[cfg(not(feature = "chrome"))]
17impl NetworkInterceptManager {
18    /// a custom intercept handle.
19    pub fn new(_url: &Option<Box<url::Url>>) -> NetworkInterceptManager {
20        NetworkInterceptManager::Unknown
21    }
22}
23
24#[derive(Debug, Default, Clone, PartialEq)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26/// Wait for network request. This does nothing without the `chrome` flag enabled.
27pub struct WaitForIdleNetwork {
28    /// The max time to wait for the network. It is recommended to set this to a value around 30s. Set the value to None to remove the timeout.
29    pub timeout: Option<core::time::Duration>,
30}
31
32impl WaitForIdleNetwork {
33    /// Create new WaitForIdleNetwork with timeout.
34    pub fn new(timeout: Option<core::time::Duration>) -> Self {
35        Self { timeout }
36    }
37}
38
39#[derive(Debug, Default, Clone, PartialEq)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
41/// Wait for a selector with optional timeout. This does nothing without the `chrome` flag enabled.
42pub struct WaitForSelector {
43    /// The max time to wait for the selector. It is recommended to set this to a value around 30s. Set the value to None to remove the timeout.
44    pub timeout: Option<core::time::Duration>,
45    /// The selector wait for
46    pub selector: String,
47}
48
49impl WaitForSelector {
50    /// Create new WaitForSelector with timeout.
51    pub fn new(timeout: Option<core::time::Duration>, selector: String) -> Self {
52        Self { timeout, selector }
53    }
54}
55
56#[derive(Debug, Default, Clone, PartialEq)]
57#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
58/// Wait for with a delay. Should only be used for testing purposes. This does nothing without the `chrome` flag enabled.
59pub struct WaitForDelay {
60    /// The max time to wait. It is recommended to set this to a value around 30s. Set the value to None to remove the timeout.
61    pub timeout: Option<core::time::Duration>,
62}
63
64impl WaitForDelay {
65    /// Create new WaitForDelay with timeout.
66    pub fn new(timeout: Option<core::time::Duration>) -> Self {
67        Self { timeout }
68    }
69}
70
71#[derive(Debug, Default, Clone, PartialEq)]
72#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73/// The wait for options for the page. Multiple options can be set. This does nothing without the `chrome` flag enabled.
74pub struct WaitFor {
75    /// The max time to wait for the selector.
76    pub selector: Option<WaitForSelector>,
77    /// Wait for network request to be idle within a time frame period (500ms no network connections). This does nothing without the `chrome` flag enabled.
78    pub idle_network: Option<WaitForIdleNetwork>,
79    /// Wait for network request with a max timeout. This does nothing without the `chrome` flag enabled.
80    pub idle_network0: Option<WaitForIdleNetwork>,
81    /// Wait for network to be almost idle with a max timeout. This does nothing without the `chrome` flag enabled.
82    pub almost_idle_network0: Option<WaitForIdleNetwork>,
83    /// Wait for delay. Should only be used for testing.
84    pub delay: Option<WaitForDelay>,
85    /// Wait for dom element to stop updating.
86    pub dom: Option<WaitForSelector>,
87    #[cfg_attr(feature = "serde", serde(default))]
88    /// Wait for page navigations.
89    pub page_navigations: bool,
90}
91
92impl WaitFor {
93    /// Create new WaitFor with timeout.
94    pub fn new(
95        timeout: Option<core::time::Duration>,
96        delay: Option<WaitForDelay>,
97        page_navigations: bool,
98        idle_network: bool,
99        selector: Option<String>,
100        dom: Option<WaitForSelector>,
101    ) -> Self {
102        Self {
103            page_navigations,
104            idle_network: if idle_network {
105                Some(WaitForIdleNetwork::new(timeout))
106            } else {
107                None
108            },
109            idle_network0: None,
110            almost_idle_network0: None,
111            selector: if selector.is_some() {
112                Some(WaitForSelector::new(timeout, selector.unwrap_or_default()))
113            } else {
114                None
115            },
116            delay,
117            dom,
118        }
119    }
120}
121
122#[derive(
123    Debug, Clone, PartialEq, Eq, Hash, Default, strum::EnumString, strum::Display, strum::AsRefStr,
124)]
125#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
126/// Capture screenshot options for chrome.
127pub enum CaptureScreenshotFormat {
128    #[cfg_attr(feature = "serde", serde(rename = "jpeg"))]
129    /// jpeg format
130    Jpeg,
131    #[cfg_attr(feature = "serde", serde(rename = "png"))]
132    #[default]
133    /// png format
134    Png,
135    #[cfg_attr(feature = "serde", serde(rename = "webp"))]
136    /// webp format
137    Webp,
138}
139
140#[cfg(feature = "chrome")]
141impl From<CaptureScreenshotFormat>
142    for chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat
143{
144    fn from(format: CaptureScreenshotFormat) -> Self {
145        match format {
146            CaptureScreenshotFormat::Jpeg => {
147                chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Jpeg
148            }
149            CaptureScreenshotFormat::Png => {
150                chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png
151            }
152            CaptureScreenshotFormat::Webp => {
153                chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Webp
154            }
155        }
156    }
157}
158
159#[derive(Debug, Clone, Copy, PartialEq)]
160#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
161/// View port handling for chrome.
162pub struct Viewport {
163    /// Device screen Width
164    pub width: u32,
165    /// Device screen size
166    pub height: u32,
167    /// Device scale factor
168    pub device_scale_factor: Option<f64>,
169    /// Emulating Mobile?
170    pub emulating_mobile: bool,
171    /// Use landscape mode instead of portrait.
172    pub is_landscape: bool,
173    /// Touch screen device?
174    pub has_touch: bool,
175}
176
177impl Default for Viewport {
178    fn default() -> Self {
179        Viewport {
180            width: 800,
181            height: 600,
182            device_scale_factor: None,
183            emulating_mobile: false,
184            is_landscape: false,
185            has_touch: false,
186        }
187    }
188}
189
190impl Viewport {
191    /// Create a new viewport layout for chrome passing in the width.
192    pub fn new(width: u32, height: u32) -> Self {
193        Viewport {
194            width,
195            height,
196            ..Default::default()
197        }
198    }
199    /// Determine if the layout is a mobile device or not to emulate.
200    pub fn set_mobile(&mut self, emulating_mobile: bool) {
201        self.emulating_mobile = emulating_mobile;
202    }
203    /// Determine if the layout is in landscrape view or not to emulate.
204    pub fn set_landscape(&mut self, is_landscape: bool) {
205        self.is_landscape = is_landscape;
206    }
207    /// Determine if the device is a touch screen or not to emulate.
208    pub fn set_touch(&mut self, has_touch: bool) {
209        self.has_touch = has_touch;
210    }
211    /// The scale factor for the screen layout.
212    pub fn set_scale_factor(&mut self, device_scale_factor: Option<f64>) {
213        self.device_scale_factor = device_scale_factor;
214    }
215}
216
217#[cfg(feature = "chrome")]
218impl From<Viewport> for chromiumoxide::handler::viewport::Viewport {
219    fn from(viewport: Viewport) -> Self {
220        Self {
221            width: viewport.width,
222            height: viewport.height,
223            device_scale_factor: viewport.device_scale_factor,
224            emulating_mobile: viewport.emulating_mobile,
225            is_landscape: viewport.is_landscape,
226            has_touch: viewport.has_touch,
227        }
228    }
229}
230
231impl From<Viewport> for spider_fingerprint::spoof_viewport::Viewport {
232    fn from(viewport: Viewport) -> Self {
233        Self {
234            width: viewport.width,
235            height: viewport.height,
236            device_scale_factor: viewport.device_scale_factor,
237            emulating_mobile: viewport.emulating_mobile,
238            is_landscape: viewport.is_landscape,
239            has_touch: viewport.has_touch,
240        }
241    }
242}
243
244#[doc = "Capture page screenshot.\n[captureScreenshot](https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-captureScreenshot)"]
245#[derive(Debug, Clone, Default, PartialEq)]
246#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
247pub struct CaptureScreenshotParams {
248    #[doc = "Image compression format (defaults to png)."]
249    pub format: Option<CaptureScreenshotFormat>,
250    #[doc = "Compression quality from range [0..100] (jpeg only)."]
251    pub quality: Option<i64>,
252    #[doc = "Capture the screenshot of a given region only."]
253    pub clip: Option<ClipViewport>,
254    #[doc = "Capture the screenshot from the surface, rather than the view. Defaults to true."]
255    pub from_surface: Option<bool>,
256    #[doc = "Capture the screenshot beyond the viewport. Defaults to false."]
257    pub capture_beyond_viewport: Option<bool>,
258}
259
260#[derive(Debug, Clone, PartialEq)]
261#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
262/// The view port clip for screenshots.
263pub struct ClipViewport {
264    #[doc = "X offset in device independent pixels (dip)."]
265    #[cfg_attr(feature = "serde", serde(rename = "x"))]
266    pub x: f64,
267    #[doc = "Y offset in device independent pixels (dip)."]
268    #[cfg_attr(feature = "serde", serde(rename = "y"))]
269    pub y: f64,
270    #[doc = "Rectangle width in device independent pixels (dip)."]
271    #[cfg_attr(feature = "serde", serde(rename = "width"))]
272    pub width: f64,
273    #[doc = "Rectangle height in device independent pixels (dip)."]
274    #[cfg_attr(feature = "serde", serde(rename = "height"))]
275    pub height: f64,
276    #[doc = "Page scale factor."]
277    #[cfg_attr(feature = "serde", serde(rename = "scale"))]
278    pub scale: f64,
279}
280
281#[cfg(feature = "chrome")]
282impl From<ClipViewport> for chromiumoxide::cdp::browser_protocol::page::Viewport {
283    fn from(viewport: ClipViewport) -> Self {
284        Self {
285            x: viewport.x,
286            y: viewport.y,
287            height: viewport.height,
288            width: viewport.width,
289            scale: viewport.scale,
290        }
291    }
292}
293
294/// Screenshot configuration.
295#[derive(Debug, Default, Clone, PartialEq)]
296#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
297pub struct ScreenShotConfig {
298    /// The screenshot params.
299    pub params: ScreenshotParams,
300    /// Return the bytes of the screenshot on the Page.
301    pub bytes: bool,
302    /// Store the screenshot to disk. This can be used with output_dir. If disabled will not store the file to the output directory.
303    pub save: bool,
304    /// The output directory to store the file. Parant folders may be created inside the directory.
305    pub output_dir: Option<std::path::PathBuf>,
306}
307
308impl ScreenShotConfig {
309    /// Create a new screenshot configuration.
310    pub fn new(
311        params: ScreenshotParams,
312        bytes: bool,
313        save: bool,
314        output_dir: Option<std::path::PathBuf>,
315    ) -> Self {
316        Self {
317            params,
318            bytes,
319            save,
320            output_dir,
321        }
322    }
323}
324
325/// The screenshot params for the page.
326#[derive(Default, Debug, Clone, PartialEq)]
327#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
328pub struct ScreenshotParams {
329    /// Chrome DevTools Protocol screenshot options.
330    pub cdp_params: CaptureScreenshotParams,
331    /// Take full page screenshot.
332    pub full_page: Option<bool>,
333    /// Make the background transparent (png only).
334    pub omit_background: Option<bool>,
335}
336
337impl ScreenshotParams {
338    /// Create a new ScreenshotParams.
339    pub fn new(
340        cdp_params: CaptureScreenshotParams,
341        full_page: Option<bool>,
342        omit_background: Option<bool>,
343    ) -> Self {
344        Self {
345            cdp_params,
346            full_page,
347            omit_background,
348        }
349    }
350}
351
352#[cfg(feature = "chrome")]
353impl From<ScreenshotParams> for chromiumoxide::page::ScreenshotParams {
354    fn from(params: ScreenshotParams) -> Self {
355        let full_page = if params.full_page.is_some() {
356            params.full_page.unwrap_or_default()
357        } else {
358            std::env::var("SCREENSHOT_FULL_PAGE").unwrap_or_default() == "true"
359        };
360        let omit_background = if params.omit_background.is_some() {
361            params.omit_background.unwrap_or_default()
362        } else {
363            match std::env::var("SCREENSHOT_OMIT_BACKGROUND") {
364                Ok(t) => t == "true",
365                _ => true,
366            }
367        };
368        let format = if params.cdp_params.format.is_some() {
369            match params.cdp_params.format {
370                Some(v) => v.into(),
371                _ => chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png,
372            }
373        } else {
374            chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png
375        };
376
377        let params_builder = chromiumoxide::page::ScreenshotParams::builder()
378            .format(format)
379            .full_page(full_page)
380            .omit_background(omit_background);
381
382        let params_builder = if params.cdp_params.quality.is_some() {
383            params_builder.quality(params.cdp_params.quality.unwrap_or(75))
384        } else {
385            params_builder
386        };
387
388        let params_builder = if params.cdp_params.clip.is_some() {
389            match params.cdp_params.clip {
390                Some(vp) => params_builder.clip(
391                    chromiumoxide::cdp::browser_protocol::page::Viewport::from(vp),
392                ),
393                _ => params_builder,
394            }
395        } else {
396            params_builder
397        };
398
399        let params_builder = if params.cdp_params.capture_beyond_viewport.is_some() {
400            match params.cdp_params.capture_beyond_viewport {
401                Some(capture_beyond_viewport) => {
402                    params_builder.capture_beyond_viewport(capture_beyond_viewport)
403                }
404                _ => params_builder,
405            }
406        } else {
407            params_builder
408        };
409
410        let params_builder = if params.cdp_params.from_surface.is_some() {
411            match params.cdp_params.from_surface {
412                Some(from_surface) => params_builder.from_surface(from_surface),
413                _ => params_builder,
414            }
415        } else {
416            params_builder
417        };
418
419        params_builder.build()
420    }
421}
422
423#[doc = "The decision on what to do in response to the authorization challenge.  Default means\ndeferring to the default behavior of the net stack, which will likely either the Cancel\nauthentication or display a popup dialog box."]
424#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
425#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
426pub enum AuthChallengeResponseResponse {
427    #[default]
428    /// The default.
429    Default,
430    /// Cancel the authentication prompt.
431    CancelAuth,
432    /// Provide credentials.
433    ProvideCredentials,
434}
435
436#[doc = "Response to an AuthChallenge.\n[AuthChallengeResponse](https://chromedevtools.github.io/devtools-protocol/tot/Fetch/#type-AuthChallengeResponse)"]
437#[derive(Default, Debug, Clone, PartialEq)]
438#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
439pub struct AuthChallengeResponse {
440    #[doc = "The decision on what to do in response to the authorization challenge.  Default means\ndeferring to the default behavior of the net stack, which will likely either the Cancel\nauthentication or display a popup dialog box."]
441    pub response: AuthChallengeResponseResponse,
442    #[doc = "The username to provide, possibly empty. Should only be set if response is\nProvideCredentials."]
443    pub username: Option<String>,
444    #[doc = "The password to provide, possibly empty. Should only be set if response is\nProvideCredentials."]
445    pub password: Option<String>,
446}
447
448#[cfg(feature = "chrome")]
449impl From<AuthChallengeResponse>
450    for chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponse
451{
452    fn from(auth_challenge_response: AuthChallengeResponse) -> Self {
453        Self {
454            response: match auth_challenge_response.response {
455                AuthChallengeResponseResponse::CancelAuth => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::CancelAuth ,
456                AuthChallengeResponseResponse::ProvideCredentials => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::ProvideCredentials ,
457                AuthChallengeResponseResponse::Default => chromiumoxide::cdp::browser_protocol::fetch::AuthChallengeResponseResponse::Default ,
458
459            },
460            username: auth_challenge_response.username,
461            password: auth_challenge_response.password
462        }
463    }
464}
465
466/// Represents various web automation actions.
467#[derive(Debug, Clone, PartialEq)]
468#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
469pub enum WebAutomation {
470    /// Runs custom JavaScript code.
471    Evaluate(String),
472    /// Clicks on an element.
473    Click(String),
474    /// Clicks on all elements.
475    ClickAll(String),
476    /// Clicks at the position x and y coordinates.
477    ClickPoint {
478        /// The horizontal (X) coordinate.
479        x: f64,
480        /// The vertical (Y) coordinate.
481        y: f64,
482    },
483    /// Click and hold on an element (uses the element clickable point).
484    ClickHold {
485        /// The selector of the element to click-hold.
486        selector: String,
487        /// How long to hold (ms).
488        hold_ms: u64,
489    },
490    /// Click and hold at a specific point.
491    ClickHoldPoint {
492        /// The horizontal (X) coordinate.
493        x: f64,
494        /// The vertical (Y) coordinate.
495        y: f64,
496        /// How long to hold (ms).
497        hold_ms: u64,
498    },
499    /// Click-and-drag from one element to another (uses clickable points).
500    ClickDrag {
501        /// Drag start selector
502        from: String,
503        /// Drag end selector
504        to: String,
505        /// Optional modifier (e.g. 8 for Shift). If None, no modifier.
506        modifier: Option<i64>,
507    },
508
509    /// Click-and-drag from one point to another.
510    ClickDragPoint {
511        /// Start X
512        from_x: f64,
513        /// Start Y
514        from_y: f64,
515        /// End X
516        to_x: f64,
517        /// End Y
518        to_y: f64,
519        /// Optional modifier (e.g. 8 for Shift). If None, no modifier.
520        modifier: Option<i64>,
521    },
522    /// Clicks on all elements.
523    ClickAllClickable(),
524    /// Waits for a fixed duration in milliseconds.
525    Wait(u64),
526    /// Waits for the next navigation event.
527    WaitForNavigation,
528    /// Wait for dom updates to stop.
529    WaitForDom {
530        /// The selector of the element to wait for updates.
531        selector: Option<String>,
532        ///  The timeout to wait for in ms.
533        timeout: u32,
534    },
535    /// Waits for an element to appear.
536    WaitFor(String),
537    /// Waits for an element to appear with a timeout.
538    WaitForWithTimeout {
539        /// The selector of the element to wait for updates.
540        selector: String,
541        ///  The timeout to wait for in ms.
542        timeout: u64,
543    },
544    /// Waits for an element to appear and then clicks on it.
545    WaitForAndClick(String),
546    /// Scrolls the screen in the horizontal axis by a specified amount in pixels.
547    ScrollX(i32),
548    /// Scrolls the screen in the vertical axis by a specified amount in pixels.
549    ScrollY(i32),
550    /// Fills an input element with a specified value.
551    Fill {
552        /// The selector of the input element to fill.
553        selector: String,
554        ///  The value to fill the input element with.
555        value: String,
556    },
557    /// Type with the keyboard.
558    Type {
559        ///  The value to fill the input element with.
560        value: String,
561        /// The mofidier to use for the key.
562        modifier: Option<i64>,
563    },
564    /// Scrolls the page until the end.
565    InfiniteScroll(u32),
566    /// Perform a screenshot on the page - fullscreen and omit background for params.
567    Screenshot {
568        /// Take a full page screenshot.
569        full_page: bool,
570        /// Omit the background.
571        omit_background: bool,
572        /// The output file to store the screenshot.
573        output: String,
574    },
575    /// Only continue to the next automation if the prior step was valid. Use this intermediate after a step to break out of the chain.
576    ValidateChain,
577}
578
579impl WebAutomation {
580    /// Machine-friendly variant name (no params).
581    pub fn name(&self) -> &'static str {
582        use WebAutomation::*;
583        match self {
584            Evaluate(_) => "Evaluate",
585            Click(_) => "Click",
586            ClickDrag { .. } => "ClickDrag",
587            ClickDragPoint { .. } => "ClickDragPoint",
588            ClickHold { .. } => "ClickHold",
589            ClickHoldPoint { .. } => "ClickHoldPoint",
590            ClickAll(_) => "ClickAll",
591            ClickAllClickable() => "ClickAllClickable",
592            ClickPoint { .. } => "ClickPoint",
593            Wait(_) => "Wait",
594            WaitForNavigation => "WaitForNavigation",
595            WaitForDom { .. } => "WaitForDom",
596            WaitFor(_) => "WaitFor",
597            WaitForWithTimeout { .. } => "WaitForWithTimeout",
598            WaitForAndClick(_) => "WaitForAndClick",
599            ScrollX(_) => "ScrollX",
600            ScrollY(_) => "ScrollY",
601            Fill { .. } => "Fill",
602            Type { .. } => "Type",
603            InfiniteScroll(_) => "InfiniteScroll",
604            Screenshot { .. } => "Screenshot",
605            ValidateChain => "ValidateChain",
606        }
607    }
608
609    /// Optional: human-friendly label with key params.
610    pub fn label(&self) -> String {
611        use WebAutomation::*;
612        match self {
613            Evaluate(_) => "Evaluate JS".into(),
614            Click(s) => format!("Click {}", s),
615            ClickHold { selector, hold_ms } => {
616                format!("ClickHold {} ({}ms)", selector, hold_ms)
617            }
618            ClickHoldPoint { x, y, hold_ms } => {
619                format!("ClickHoldPoint x:{} y:{} ({}ms)", x, y, hold_ms)
620            }
621            ClickDrag { from, to, modifier } => {
622                format!("ClickDrag {} -> {} modifier={:?}", from, to, modifier)
623            }
624            ClickDragPoint {
625                from_x,
626                from_y,
627                to_x,
628                to_y,
629                modifier,
630            } => format!(
631                "ClickDragPoint ({},{}) -> ({},{}) modifier={:?}",
632                from_x, from_y, to_x, to_y, modifier
633            ),
634            ClickAll(s) => format!("ClickAll {}", s),
635            ClickAllClickable() => "ClickAllClickable".into(),
636            Wait(ms) => format!("Wait {}ms", ms),
637            WaitForNavigation => "WaitForNavigation".into(),
638            ClickPoint { x, y } => {
639                format!("ClickPoint x:{} y:{}", x, y)
640            }
641            WaitForDom { selector, timeout } => selector
642                .as_ref()
643                .map(|s| format!("WaitForDom {} ({}ms)", s, timeout))
644                .unwrap_or_else(|| format!("WaitForDom ({}ms)", timeout)),
645            WaitFor(s) => format!("WaitFor {}", s),
646            WaitForWithTimeout { selector, timeout } => {
647                format!("WaitForWithTimeout {} ({}ms)", selector, timeout)
648            }
649            WaitForAndClick(s) => format!("WaitForAndClick {}", s),
650            ScrollX(dx) => format!("ScrollX {}", dx),
651            ScrollY(dy) => format!("ScrollY {}", dy),
652            Fill { selector, .. } => format!("Fill {}", selector),
653            Type { value, modifier } => format!("Type {} modifier={:?}", value, modifier),
654            InfiniteScroll(n) => format!("InfiniteScroll {}", n),
655            Screenshot {
656                full_page,
657                omit_background,
658                output,
659            } => format!(
660                "Screenshot full={} omit_bg={} -> {}",
661                full_page, omit_background, output
662            ),
663            ValidateChain => "ValidateChain".into(),
664        }
665    }
666}
667
668impl core::fmt::Display for WebAutomation {
669    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
670        f.write_str(&self.label())
671    }
672}
673
674#[cfg(feature = "chrome")]
675/// Generate the wait for Dom function targeting the element. This defaults to using the body.
676pub(crate) fn generate_wait_for_dom_js_v2(
677    timeout_ms: u32,
678    selector: &str,
679    quiet_ms: u32,
680    stable_frames: u32,
681    require_visible: bool,
682    require_images_complete: bool,
683) -> String {
684    let t = timeout_ms.min(crate::utils::FIVE_MINUTES);
685    let q = quiet_ms.max(50).min(t);
686    let f = stable_frames.max(1).min(10);
687    let s = if selector.is_empty() {
688        "body"
689    } else {
690        selector
691    };
692
693    format!(
694        r###"(()=>new Promise(R=>{{const S={s:?},T={t},Q={q},F={f},V={vis},I={img},P=["#__next","#__nuxt","#app","#root","main","body"],N=()=>performance.now(),W=e=>{{if(!e)return!1;const t=getComputedStyle(e);if("none"===t.display||"hidden"===t.visibility||"0"===t.opacity)return!1;const r=e.getBoundingClientRect();return r.width>0&&r.height>0}},M=e=>{{if(!e)return!1;const t=e.querySelectorAll("img");for(let e=0;e<t.length;e++){{const r=t[e];if(!r.complete)return!1;if(0===r.naturalWidth&&0===r.naturalHeight&&r.currentSrc)return!1}}return!0}},k=()=>{{let e=document.querySelector(S);if(e)return e;for(let t=0;t<P.length;t++){{if(e=document.querySelector(P[t]))return e}}return null}},s=N();let e=null,t=null,r=s,o=0;const a=n=>{{t&&t.disconnect(),t=new MutationObserver(()=>{{r=N(),o=0}}),t.observe(n,{{subtree:!0,childList:!0,attributes:!0,characterData:!0}})}},i=()=>{{const n=N();if(n-s>=T)return t&&t.disconnect(),void R(!1);(!e||!document.contains(e))&&(e=k())&&((r=n,o=0),a(e));e&&(V&&!W(e)?o=0:I&&!M(e)?o=0:n-r>=Q?(o++,o>=F&&(t&&t.disconnect(),R(!0))):o=0),requestAnimationFrame(i)}};i()}}))()"###,
695        t = t,
696        q = q,
697        f = f,
698        vis = if require_visible { "true" } else { "false" },
699        img = if require_images_complete {
700            "true"
701        } else {
702            "false"
703        },
704    )
705}
706
707#[cfg(feature = "chrome")]
708/// Generate the wait for Dom function targeting the element. This defaults to using the body.
709pub(crate) fn generate_wait_for_dom_js_code_with_selector(
710    timeout: u32,
711    selector: Option<&str>,
712) -> String {
713    let t = timeout.min(crate::utils::FIVE_MINUTES);
714    let s = selector.unwrap_or("body");
715    format!(
716        "new Promise((r,j)=>{{const s='{s}',t={t},i=220,n=50;let l=Date.now(),el,o,d,c;const check=()=>{{el=document.querySelector(s);if(!el)return;clearInterval(wait);l=Date.now();o=new MutationObserver(()=>{{l=Date.now();}});o.observe(el,{{childList:!0,subtree:!0,attributes:!0,characterData:!0}});d=setTimeout(()=>{{clearInterval(c),o.disconnect(),j(new Error('Dom Timeout.'))}},t);c=setInterval(()=>{{Date.now()-l>=i&&(clearTimeout(d),clearInterval(c),o.disconnect(),r(!0))}},n);}};const wait=setInterval(check,n);check();}});",
717        s = s,
718        t = t
719    )
720}
721
722// #[cfg(feature = "chrome")]
723// /// Generate the wait for Dom function targeting the element. This defaults to using the body.
724// pub(crate) fn generate_wait_for_dom_js_code_with_selector_base(
725//     timeout: u32,
726//     selector: &str,
727// ) -> String {
728//     generate_wait_for_dom_js_code_with_selector(
729//         timeout,
730//         if selector.is_empty() {
731//             None
732//         } else {
733//             Some(selector)
734//         },
735//     )
736// }
737
738#[cfg(feature = "chrome")]
739const CLICKABLE_SELECTOR: &str = concat!(
740    "button:not([disabled]),",
741    "input[type='button']:not([disabled]),",
742    "input[type='submit']:not([disabled]),",
743    "input[type='reset']:not([disabled]),",
744    "input[type='checkbox']:not([disabled]),",
745    "input[type='radio']:not([disabled]),",
746    "label[for],",
747    "summary,",
748    "select:not([disabled]),",
749    "textarea:not([disabled]),",
750    "[role='button']:not([aria-disabled='true']),",
751    "[onclick],",
752    "[contenteditable=''],[contenteditable='true'],",
753    // focusables as a last resort
754    "[tabindex]:not([tabindex^='-'],a[href])"
755);
756
757impl WebAutomation {
758    #[cfg(feature = "chrome")]
759    /// Run the web automation step.
760    pub async fn run(&self, page: &chromiumoxide::Page) -> bool {
761        use crate::utils::wait_for_selector;
762        use std::time::Duration;
763
764        let mut valid = false;
765
766        match self {
767            WebAutomation::Evaluate(js) => {
768                valid = page.evaluate(js.as_str()).await.is_ok();
769            }
770            WebAutomation::Click(selector) => {
771                if let Ok(ele) = page.find_element(selector).await {
772                    valid = ele.click().await.is_ok();
773                }
774            }
775
776            WebAutomation::WaitForWithTimeout { selector, timeout } => {
777                valid =
778                    wait_for_selector(page, Some(Duration::from_millis(*timeout)), selector).await;
779            }
780            WebAutomation::Wait(ms) => {
781                tokio::time::sleep(Duration::from_millis(*ms)).await;
782                valid = true;
783            }
784            WebAutomation::ClickHold { selector, hold_ms } => {
785                if let Ok(ele) = page.find_element(selector).await {
786                    if let Ok(pt) = ele.clickable_point().await {
787                        valid = page
788                            .click_and_hold(pt, Duration::from_millis(*hold_ms))
789                            .await
790                            .is_ok();
791                    }
792                }
793            }
794            WebAutomation::ClickHoldPoint { x, y, hold_ms } => {
795                let pt = chromiumoxide::layout::Point { x: *x, y: *y };
796                valid = page
797                    .click_and_hold(pt, Duration::from_millis(*hold_ms))
798                    .await
799                    .is_ok();
800            }
801            WebAutomation::ClickAll(selector) => {
802                if let Ok(eles) = page.find_elements(selector).await {
803                    for ele in eles {
804                        valid = ele.click().await.is_ok();
805                    }
806                }
807            }
808            WebAutomation::ClickDrag { from, to, modifier } => {
809                if let (Ok(from_el), Ok(to_el)) =
810                    (page.find_element(from).await, page.find_element(to).await)
811                {
812                    if let (Ok(p1), Ok(p2)) = (
813                        from_el.clickable_point().await,
814                        to_el.clickable_point().await,
815                    ) {
816                        valid = match modifier {
817                            Some(m) => page.click_and_drag_with_modifier(p1, p2, *m).await.is_ok(),
818                            None => page.click_and_drag(p1, p2).await.is_ok(),
819                        };
820                    }
821                }
822            }
823
824            WebAutomation::ClickDragPoint {
825                from_x,
826                from_y,
827                to_x,
828                to_y,
829                modifier,
830            } => {
831                let p1 = chromiumoxide::layout::Point {
832                    x: *from_x,
833                    y: *from_y,
834                };
835                let p2 = chromiumoxide::layout::Point { x: *to_x, y: *to_y };
836
837                valid = match modifier {
838                    Some(m) => page.click_and_drag_with_modifier(p1, p2, *m).await.is_ok(),
839                    None => page.click_and_drag(p1, p2).await.is_ok(),
840                };
841            }
842            WebAutomation::ClickAllClickable() => {
843                if let Ok(eles) = page.find_elements(CLICKABLE_SELECTOR).await {
844                    for ele in eles {
845                        valid = ele.click().await.is_ok();
846                    }
847                }
848            }
849            WebAutomation::ClickPoint { x, y } => {
850                valid = page
851                    .click(chromiumoxide::layout::Point { x: *x, y: *y })
852                    .await
853                    .is_ok();
854            }
855            WebAutomation::WaitForDom { selector, timeout } => {
856                valid = page
857                    .evaluate(
858                        generate_wait_for_dom_js_code_with_selector(*timeout, selector.as_deref())
859                            .as_str(),
860                    )
861                    .await
862                    .is_ok();
863            }
864            WebAutomation::WaitFor(selector) => {
865                valid = wait_for_selector(page, Some(Duration::from_secs(60)), selector).await;
866            }
867            WebAutomation::WaitForNavigation => {
868                valid = page.wait_for_navigation().await.is_ok();
869            }
870            WebAutomation::WaitForAndClick(selector) => {
871                valid = wait_for_selector(page, Some(Duration::from_secs(60)), selector).await;
872                if let Ok(ele) = page.find_element(selector).await {
873                    valid = ele.click().await.is_ok();
874                }
875            }
876            WebAutomation::ScrollX(px) => {
877                valid = page
878                    .scroll_by((*px as f32).into(), 0.0, Default::default())
879                    .await
880                    .is_ok()
881            }
882            WebAutomation::ScrollY(px) => {
883                valid = page
884                    .scroll_by(0.0, (*px as f32).into(), Default::default())
885                    .await
886                    .is_ok()
887            }
888            WebAutomation::Fill { selector, value } => {
889                if let Ok(ele) = page.find_element(selector).await {
890                    if let Ok(el) = ele.click().await {
891                        valid = el.type_str(value).await.is_ok();
892                    }
893                }
894            }
895            WebAutomation::Type { value, modifier } => {
896                valid = page.type_str_with_modifier(value, *modifier).await.is_ok()
897            }
898            WebAutomation::InfiniteScroll(duration) => {
899                valid = page.evaluate(set_dynamic_scroll(*duration)).await.is_ok();
900            }
901            WebAutomation::Screenshot {
902                full_page,
903                omit_background,
904                output,
905            } => {
906                let mut cdp_params: CaptureScreenshotParams = CaptureScreenshotParams::default();
907                cdp_params.format = Some(CaptureScreenshotFormat::Png);
908
909                let screenshot_params =
910                    ScreenshotParams::new(cdp_params, Some(*full_page), Some(*omit_background));
911
912                valid = page
913                    .save_screenshot(screenshot_params, output)
914                    .await
915                    .is_ok();
916            }
917            _ => (),
918        };
919
920        valid
921    }
922}
923
924/// Set a dynamic time to scroll.
925pub fn set_dynamic_scroll(timeout: u32) -> String {
926    let timeout = timeout.min(crate::utils::FIVE_MINUTES);
927    let s = string_concat!(
928        r###"document.addEventListener('DOMContentLoaded',e=>{let t=null,o=null,n="###,
929        timeout.to_string(),
930        r###",a=Date.now(),i=Date.now(),r=()=>{window.scrollTo(0,document.body.scrollHeight)},l=()=>{o&&o.disconnect(),console.log('Stopped checking for new content.')},c=(e,n)=>{e.forEach(e=>{if(e.isIntersecting){i=Date.now();const n=Date.now();if(n-a>=t||n-i>=1e4)return void l();r(),t=document.querySelector('body > *:last-child'),o.observe(t)}})},s=()=>{t&&(o=new IntersectionObserver(c),o.observe(t))},d=()=>{['load','error','abort'].forEach(e=>{window.addEventListener(e,()=>{const e=document.querySelector('body > *:last-child');e!==t&&(i=Date.now(),t=e,o.observe(t))})})},u=()=>{r(),t=document.querySelector('body > *:last-child'),s(),d()};u(),setTimeout(l,n)});"###
931    );
932
933    s
934}
935
936/// Execution scripts to run on the page when using chrome by url.
937pub type ExecutionScriptsMap = hashbrown::HashMap<String, String>;
938/// Automation scripts to run on the page when using chrome by url.
939pub type AutomationScriptsMap = hashbrown::HashMap<String, Vec<WebAutomation>>;
940
941/// Execution scripts to run on the page when using chrome by url.
942pub type ExecutionScripts = Trie<String>;
943/// Automation scripts to run on the page when using chrome by url.
944pub type AutomationScripts = Trie<Vec<WebAutomation>>;
945
946#[derive(Debug, Clone, Default, PartialEq)]
947#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
948/// Chrome request interception configurations.
949pub struct RequestInterceptConfiguration {
950    /// Request interception enabled?
951    pub enabled: bool,
952    /// Block visuals. By default this is enabled. This will prevent Prefetch, Ping, and some javascript from rendering.
953    pub block_visuals: bool,
954    /// Block stylesheets.
955    pub block_stylesheets: bool,
956    /// Block javascript only allowing critcal framework or lib based javascript to render..
957    pub block_javascript: bool,
958    /// Block analytics.
959    pub block_analytics: bool,
960    /// Block ads. Requires the `adblock` feature flag.
961    pub block_ads: bool,
962    /// Intercept Manager
963    pub intercept_manager: NetworkInterceptManager,
964    /// Whitelist patterns.
965    pub whitelist_patterns: Option<Vec<String>>,
966    /// Blacklist patterns.
967    pub blacklist_patterns: Option<Vec<String>>,
968}
969
970impl RequestInterceptConfiguration {
971    /// Setup a new intercept config
972    pub fn new(enabled: bool) -> RequestInterceptConfiguration {
973        RequestInterceptConfiguration {
974            enabled,
975            block_javascript: false,
976            block_visuals: true,
977            block_analytics: true,
978            block_stylesheets: true,
979            block_ads: true,
980            ..Default::default()
981        }
982    }
983    /// Setup a new intercept config with a custom intercept manager.
984    pub fn new_manager(
985        enabled: bool,
986        url: &Option<Box<url::Url>>,
987    ) -> RequestInterceptConfiguration {
988        RequestInterceptConfiguration {
989            enabled,
990            block_javascript: false,
991            block_visuals: true,
992            block_analytics: true,
993            block_stylesheets: true,
994            intercept_manager: NetworkInterceptManager::new(url),
995            ..Default::default()
996        }
997    }
998
999    /// Setup the network request manager type.
1000    pub fn setup_intercept_manager(&mut self, url: &Option<Box<url::Url>>) {
1001        self.intercept_manager = NetworkInterceptManager::new(url);
1002    }
1003
1004    /// Set the whitelist patterns.
1005    pub fn set_whitelist_patterns(&mut self, whitelist_patterns: Option<Vec<String>>) {
1006        self.whitelist_patterns = whitelist_patterns;
1007    }
1008
1009    /// Set the blacklist patterns.
1010    pub fn set_blacklist_patterns(&mut self, blacklist_patterns: Option<Vec<String>>) {
1011        self.blacklist_patterns = blacklist_patterns;
1012    }
1013
1014    /// Block all request besides html and the important stuff.
1015    pub fn block_all(&mut self) -> &Self {
1016        self.block_javascript = true;
1017        self.block_analytics = true;
1018        self.block_stylesheets = true;
1019        self.block_visuals = true;
1020        self.block_ads = true;
1021        self
1022    }
1023}
1024
1025/// Convert ExecutionScripts to Trie.
1026pub fn convert_to_trie_execution_scripts(
1027    input: &Option<ExecutionScriptsMap>,
1028) -> Option<Trie<String>> {
1029    match input {
1030        Some(ref scripts) => {
1031            let mut trie = Trie::new();
1032            for (path, script) in scripts {
1033                trie.insert(path, script.clone());
1034            }
1035            Some(trie)
1036        }
1037        None => None,
1038    }
1039}
1040
1041/// Convert AutomationScripts to Trie.
1042pub fn convert_to_trie_automation_scripts(
1043    input: &Option<AutomationScriptsMap>,
1044) -> Option<Trie<Vec<WebAutomation>>> {
1045    match input {
1046        Some(ref scripts) => {
1047            let mut trie = Trie::new();
1048            for (path, script_list) in scripts {
1049                trie.insert(path, script_list.clone());
1050            }
1051            Some(trie)
1052        }
1053        None => None,
1054    }
1055}
1056
1057/// Eval execution scripts.
1058#[cfg(feature = "chrome")]
1059pub async fn eval_execution_scripts(
1060    page: &chromiumoxide::Page,
1061    target_url: &str,
1062    execution_scripts: &Option<ExecutionScripts>,
1063) {
1064    if let Some(scripts) = &execution_scripts {
1065        if let Some(script) = scripts.search(target_url) {
1066            let _ = page.evaluate(script.as_str()).await;
1067        } else if scripts.match_all {
1068            if let Some(script) = scripts.root.value.as_ref() {
1069                let _ = page.evaluate(script.as_str()).await;
1070            }
1071        }
1072    }
1073}
1074
1075/// Run automation scripts.
1076#[cfg(feature = "chrome")]
1077pub async fn eval_automation_scripts(
1078    page: &chromiumoxide::Page,
1079    target_url: &str,
1080    automation_scripts: &Option<AutomationScripts>,
1081) {
1082    if let Some(script_map) = automation_scripts {
1083        if let Some(scripts) = script_map.search(target_url) {
1084            let mut valid = false;
1085
1086            for script in scripts {
1087                if script == &WebAutomation::ValidateChain && !valid {
1088                    break;
1089                }
1090                match tokio::time::timeout(tokio::time::Duration::from_secs(60), script.run(page))
1091                    .await
1092                {
1093                    Ok(next) => valid = next,
1094                    Err(elasped) => {
1095                        log::warn!("Script execution timed out for: {target_url} - {elasped}")
1096                    }
1097                }
1098            }
1099        } else if script_map.match_all {
1100            if let Some(scripts) = script_map.root.value.as_ref() {
1101                let mut valid = false;
1102
1103                for script in scripts {
1104                    if script == &WebAutomation::ValidateChain && !valid {
1105                        break;
1106                    }
1107                    match tokio::time::timeout(
1108                        tokio::time::Duration::from_secs(60),
1109                        script.run(page),
1110                    )
1111                    .await
1112                    {
1113                        Ok(next) => valid = next,
1114                        Err(elasped) => {
1115                            log::warn!("Script execution timed out for: {target_url} - {elasped}")
1116                        }
1117                    }
1118                }
1119            }
1120        }
1121    }
1122}
1123
1124#[cfg(feature = "chrome")]
1125/// default observer
1126pub const OBSERVER_SYMBOL_KEY: &str = "obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c";
1127/// Installs a MutationObserver + URL change hooks and a drain function.
1128#[cfg(feature = "chrome")]
1129pub const INSTALL_OBSERVER_JS: &str = r##"(function(){const K=Symbol.for("obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c");if(globalThis[K])return globalThis[K];const Q=[],cap=100,cssPath=e=>{if(!e||1!==e.nodeType)return "#text";const t=[];for(;e&&1===e.nodeType&&e!==document.documentElement;){let n=e.nodeName.toLowerCase();if(e.id){n+="#"+e.id,t.unshift(n);break}let r=e,o=1;for(;(r=r.previousElementSibling)!=null;)if(r.nodeName===e.nodeName)o++;t.unshift(n+":nth-of-type("+o+")"),e=e.parentElement}return t.length?t.join(">"):"html"},state=()=>({url:location.href,title:document.title,viewport:{w:innerWidth,h:innerHeight,dpr:devicePixelRatio||1},scroll:{x:pageXOffset||0,y:pageYOffset||0}}),serialize=m=>"childList"===m.type?{type:"childList",target:cssPath(m.target),added:Array.from(m.addedNodes||[]).slice(0,3).map(n=>1===n.nodeType?n.outerHTML:"#text"),removed:Array.from(m.removedNodes||[]).slice(0,3).map(n=>1===n.nodeType?n.outerHTML:"#text" )}:"attributes"===m.type?{type:"attributes",target:cssPath(m.target),attribute:m.attributeName||"",old:m.oldValue??null,new:m.attributeName?m.target.getAttribute(m.attributeName):null}:{type:m.type,target:cssPath(m.target)},push=e=>{Q.push({t:Date.now(),state:state(),mutations:e?e.map(serialize):[]}),Q.length>cap&&Q.splice(0,Q.length-cap)};new MutationObserver(l=>push(Array.from(l))).observe(document.documentElement,{subtree:!0,childList:!0,attributes:!0,characterData:!1,attributeOldValue:!0});const notify=()=>push(null);["pushState","replaceState"].forEach(k=>{const o=history[k];history[k]=function(){const r=o.apply(this,arguments);return dispatchEvent(new Event("locationchange")),r}});addEventListener("popstate",()=>dispatchEvent(new Event("locationchange")));addEventListener("hashchange",()=>dispatchEvent(new Event("locationchange")));addEventListener("locationchange",notify,{passive:!0});const emitVal=el=>{const now=el&&null!=el.value?""+el.value:null,old=Object.prototype.hasOwnProperty.call(el,"__oldVal")?el.__oldVal:null;el.__oldVal=now,Q.push({t:Date.now(),state:state(),action:"input",target:cssPath(el),old:old,new:now}),Q.length>cap&&Q.splice(0,Q.length-cap)};addEventListener("input",e=>{const t=e.target;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))emitVal(t)},{capture:!0,passive:!0});addEventListener("change",e=>{const t=e.target;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))emitVal(t)},{capture:!0});addEventListener("click",e=>{const el=e.target instanceof Element?e.target:null;Q.push({t:Date.now(),state:state(),action:"click",target:el?cssPath(el):null,meta:el?{tag:el.tagName,id:el.id||null,class:el.className||null,href:el.getAttribute("href")||null,text:(el.textContent||"").trim().slice(0,120)}:null}),Q.length>cap&&Q.splice(0,Q.length-cap)},{capture:!0});const handle={drain:function(){const out=Q.slice();return Q.length=0,out}};return globalThis[K]=handle})()"##;
1130
1131/// Drains the recorded changes as an array; returns [] if not installed.
1132#[cfg(feature = "chrome")]
1133pub const DRAIN_CHANGES_JS: &str = r##"(function(){const h=globalThis[Symbol.for("obs:3f2d6c07-5c01-4e2e-9ab0-8a8b9a7b2f4c")];return h&&h.drain?h.drain():[]})()"##;
1134
1135#[cfg(feature = "chrome")]
1136/// Generate a random Symbol key for this session/run.
1137fn gen_symbol_key() -> String {
1138    const ALPH: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
1139    let mut s = String::with_capacity(24);
1140    for _ in 0..24 {
1141        let i = fastrand::usize(..ALPH.len());
1142        s.push(ALPH[i] as char);
1143    }
1144    format!("obs:{}", s)
1145}
1146
1147#[cfg(feature = "chrome")]
1148/// Run automation scripts with state tracking.
1149pub async fn eval_automation_scripts_tracking(
1150    page: &chromiumoxide::Page,
1151    target_url: &str,
1152    automation_scripts: &Option<AutomationScripts>,
1153    automation_data: &mut Vec<crate::page::AutomationResults>,
1154) {
1155    use base64::{engine::general_purpose::STANDARD, Engine};
1156    use chromiumoxide::cdp::js_protocol::runtime::EvaluateParams;
1157
1158    fn eval_expr(expr: &str) -> EvaluateParams {
1159        EvaluateParams::builder()
1160            .expression(expr)
1161            .eval_as_function_fallback(true)
1162            .build()
1163            .expect("eval_expr build failed")
1164    }
1165
1166    fn eval_value(expr: &str) -> EvaluateParams {
1167        EvaluateParams::builder()
1168            .expression(expr)
1169            .return_by_value(true)
1170            .build()
1171            .expect("eval_value build failed")
1172    }
1173
1174    let symbol = gen_symbol_key();
1175
1176    let install = INSTALL_OBSERVER_JS.replacen(OBSERVER_SYMBOL_KEY, &symbol, 1);
1177    let drain = DRAIN_CHANGES_JS.replacen(OBSERVER_SYMBOL_KEY, &symbol, 1);
1178
1179    let _ = page.evaluate(eval_expr(&install)).await;
1180    let _ = page.evaluate(eval_value(&drain)).await;
1181
1182    /// The generic screenshot params.
1183    fn ss_params() -> chromiumoxide::page::ScreenshotParams {
1184        chromiumoxide::page::ScreenshotParams::builder()
1185            .format(chromiumoxide::cdp::browser_protocol::page::CaptureScreenshotFormat::Png)
1186            .full_page(true)
1187            .quality(45)
1188            .omit_background(false)
1189            .build()
1190    }
1191
1192    // Helper to drain recorded changes as a single JSON string
1193    async fn drain_changes_json(
1194        page: &chromiumoxide::Page,
1195        drain_eval: &str,
1196    ) -> Option<serde_json::Value> {
1197        if let Ok(eval) = page.evaluate(eval_value(drain_eval)).await {
1198            eval.value().cloned()
1199        } else {
1200            None
1201        }
1202    }
1203
1204    let mut run_scripts: Option<&Vec<WebAutomation>> = None;
1205
1206    if let Some(script_map) = automation_scripts {
1207        if let Some(scripts) = script_map.search(target_url) {
1208            run_scripts = Some(scripts);
1209        } else if script_map.match_all {
1210            if let Some(scripts) = script_map.root.value.as_ref() {
1211                run_scripts = Some(scripts);
1212            }
1213        }
1214    }
1215
1216    if let Some(scripts) = run_scripts {
1217        let mut valid = false;
1218
1219        for (idx, script) in scripts.iter().enumerate() {
1220            if script == &WebAutomation::ValidateChain && !valid {
1221                break;
1222            }
1223
1224            let step_name = format!("step_{}_{}", idx + 1, script.label());
1225
1226            let step_result =
1227                tokio::time::timeout(tokio::time::Duration::from_secs(60), script.run(page)).await;
1228
1229            let mut automation_results = crate::page::AutomationResults::default();
1230            automation_results.input = format!("automation:{} url:{}", step_name, target_url);
1231
1232            match step_result {
1233                Ok(next) => {
1234                    valid = next;
1235                    if let Some(changes_json) = drain_changes_json(page, &drain).await {
1236                        automation_results.content_output = changes_json;
1237                    }
1238                    if let Ok(ss) = page.screenshot(ss_params()).await {
1239                        automation_results.screenshot_output = Some(STANDARD.encode(&ss));
1240                    }
1241                }
1242                Err(elapsed) => {
1243                    automation_results.error = Some(format!(
1244                        "Script execution timed out for: {} - {}",
1245                        target_url, elapsed
1246                    ));
1247                    if let Some(changes_json) = drain_changes_json(page, &drain).await {
1248                        automation_results.content_output = changes_json;
1249                    }
1250                    if let Ok(ss) = page.screenshot(ss_params()).await {
1251                        automation_results.screenshot_output = Some(STANDARD.encode(&ss));
1252                    }
1253                    log::warn!("Script execution timed out for: {target_url} - {elapsed}");
1254                }
1255            }
1256
1257            automation_data.push(automation_results);
1258        }
1259    }
1260}