1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
use crate::{
    action::{ScrapeAction, ScrapeActionResult},
    pipeline::{ScrapeContext, ScrapeError, ScrapePipeline},
    value::Value,
};
use async_trait::async_trait;
use fantoccini::{elements::Element, Locator};
use log::error;
use serde::{Deserialize, Serialize};
use std::fmt::{Display, Formatter, Result as FormatResult};

#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
pub enum Selector {
    Css,
    Id,
    LinkText,
}

impl Selector {
    pub fn get_locator<'a>(&'a self, query: &'a str) -> Locator<'a> {
        match self {
            Selector::Css => Locator::Css(query),
            Selector::Id => Locator::Id(query),
            Selector::LinkText => Locator::LinkText(query),
        }
    }
}

#[derive(Serialize, Deserialize, Debug, Copy, Clone)]
pub enum ElementScope {
    Global,
    Scoped,
    Current,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct QueryElement {
    selector: Selector,
    query: Value,
    scope: ElementScope,
    for_each: Option<ScrapePipeline>,
}

impl QueryElement {
    pub fn new(selector: Selector, query: Value, scope: ElementScope) -> Self {
        QueryElement {
            selector,
            query,
            scope,
            for_each: None,
        }
    }

    pub fn global(selector: Selector, query: Value) -> Self {
        QueryElement::new(selector, query, ElementScope::Global)
    }

    pub fn scoped(selector: Selector, query: Value) -> Self {
        QueryElement::new(selector, query, ElementScope::Scoped)
    }

    pub fn current(selector: Selector, query: Value) -> Self {
        QueryElement::new(selector, query, ElementScope::Current)
    }

    pub fn for_each(mut self, pipeline: ScrapePipeline) -> Self {
        self.for_each = Some(pipeline);
        self
    }
}

impl Display for QueryElement {
    fn fmt(&self, fmt: &mut Formatter<'_>) -> FormatResult {
        write!(
            fmt,
            "query element with {:?} selector in {:?} scope with the query value from {}",
            self.selector, self.scope, self.query
        )
    }
}

#[async_trait]
#[typetag::serde]
impl ScrapeAction for QueryElement {
    async fn execute(&self, mut context: &mut ScrapeContext) -> ScrapeActionResult {
        let query = self
            .query
            .resolve(&mut context)
            .await?
            .ok_or(ScrapeError::MissingQuery)?;

        let locator = self.selector.get_locator(&query);
        let mut elements = match self.scope {
            ElementScope::Global => context.client.find_all(locator).await?,
            ElementScope::Scoped => find_child_elements(&mut context.scoped_element, locator).await?,
            ElementScope::Current => find_child_elements(&mut context.current_element, locator).await?,
        };

        if elements.is_empty() {
            return Err(ScrapeError::ElementQueryEmptyResult);
        }

        if let Some(ref pipeline) = self.for_each {
            // Store current scoped element
            let current_scoped = context.scoped_element.take();

            let count = elements.len();
            for (idx, element) in elements.into_iter().enumerate() {
                context.scoped_element = Some(element.clone());
                context.current_element = Some(element.clone());

                // Nested pipeline execution launch
                if let Err(error) = pipeline.execute(&mut context).await {
                    error!("Nested pipeline execution error: {}", error);
                }

                // Current element set to the last element in the sequence
                if idx == count - 1 {
                    context.current_element = Some(element);
                }
            }

            // Restore original scoped element
            context.scoped_element = current_scoped;
        } else {
            context.current_element = elements.pop();
        }

        Ok(())
    }
}

async fn find_child_elements<'a>(
    element: &mut Option<Element>,
    locator: Locator<'a>,
) -> Result<Vec<Element>, ScrapeError> {
    if let Some(element) = element {
        element
            .find_all(locator)
            .await
            .map_err(ScrapeError::WebdriverCommandError)
    } else {
        Err(ScrapeError::MissingElement)
    }
}