scrapman/action/
query_element.rs

1use crate::{
2    action::{ScrapeAction, ScrapeActionResult},
3    pipeline::{ScrapeContext, ScrapeError, ScrapePipeline},
4    value::Value,
5};
6use async_trait::async_trait;
7use fantoccini::{elements::Element, Locator};
8use log::error;
9use serde::{Deserialize, Serialize};
10use std::fmt::{Display, Formatter, Result as FormatResult};
11
12#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
13pub enum Selector {
14    Css,
15    Id,
16    LinkText,
17}
18
19impl Selector {
20    pub fn get_locator<'a>(&'a self, query: &'a str) -> Locator<'a> {
21        match self {
22            Selector::Css => Locator::Css(query),
23            Selector::Id => Locator::Id(query),
24            Selector::LinkText => Locator::LinkText(query),
25        }
26    }
27}
28
29#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
30pub enum ElementScope {
31    Global,
32    Scoped,
33    Current,
34}
35
36#[derive(Debug, Serialize, Deserialize)]
37pub struct QueryElement {
38    selector: Selector,
39    query: Value,
40    scope: ElementScope,
41    for_each: Option<ScrapePipeline>,
42}
43
44impl QueryElement {
45    pub fn new(selector: Selector, query: Value, scope: ElementScope) -> Self {
46        QueryElement {
47            selector,
48            query,
49            scope,
50            for_each: None,
51        }
52    }
53
54    pub fn global(selector: Selector, query: Value) -> Self {
55        QueryElement::new(selector, query, ElementScope::Global)
56    }
57
58    pub fn scoped(selector: Selector, query: Value) -> Self {
59        QueryElement::new(selector, query, ElementScope::Scoped)
60    }
61
62    pub fn current(selector: Selector, query: Value) -> Self {
63        QueryElement::new(selector, query, ElementScope::Current)
64    }
65
66    pub fn for_each(mut self, pipeline: ScrapePipeline) -> Self {
67        self.for_each = Some(pipeline);
68        self
69    }
70}
71
72impl Display for QueryElement {
73    fn fmt(&self, fmt: &mut Formatter<'_>) -> FormatResult {
74        write!(
75            fmt,
76            "query element with {:?} selector in {:?} scope with the query value from {}",
77            self.selector, self.scope, self.query
78        )
79    }
80}
81
82#[async_trait]
83#[typetag::serde]
84impl ScrapeAction for QueryElement {
85    async fn execute(&self, mut context: &mut ScrapeContext) -> ScrapeActionResult {
86        let query = self
87            .query
88            .resolve(&mut context)
89            .await?
90            .ok_or(ScrapeError::MissingQuery)?;
91
92        let locator = self.selector.get_locator(&query);
93        let mut elements = match self.scope {
94            ElementScope::Global => context.client.find_all(locator).await?,
95            ElementScope::Scoped => find_child_elements(&mut context.scoped_element, locator).await?,
96            ElementScope::Current => find_child_elements(&mut context.current_element, locator).await?,
97        };
98
99        if elements.is_empty() {
100            return Err(ScrapeError::ElementQueryEmptyResult);
101        }
102
103        if let Some(ref pipeline) = self.for_each {
104            // Store current scoped element
105            let current_scoped = context.scoped_element.take();
106
107            let count = elements.len();
108            for (idx, element) in elements.into_iter().enumerate() {
109                context.scoped_element = Some(element.clone());
110                context.current_element = Some(element.clone());
111
112                // Nested pipeline execution launch
113                if let Err(error) = pipeline.execute(&mut context).await {
114                    error!("Nested pipeline execution error: {}", error);
115                }
116
117                // Current element set to the last element in the sequence
118                if idx == count - 1 {
119                    context.current_element = Some(element);
120                }
121            }
122
123            // Restore original scoped element
124            context.scoped_element = current_scoped;
125        } else {
126            context.current_element = elements.pop();
127        }
128
129        Ok(())
130    }
131}
132
133async fn find_child_elements<'a>(
134    element: &mut Option<Element>,
135    locator: Locator<'a>,
136) -> Result<Vec<Element>, ScrapeError> {
137    if let Some(element) = element {
138        element
139            .find_all(locator)
140            .await
141            .map_err(ScrapeError::WebdriverCommandError)
142    } else {
143        Err(ScrapeError::MissingElement)
144    }
145}