1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
use html_query_ast::Action;
use html_query_ast::Expression;
use markup5ever::{LocalName, Namespace, QualName};
use scraper::{ElementRef, Html};
use serde_json::{Map, Value};
use std::collections::HashMap;

use thiserror::Error;

#[derive(Error, Debug)]
pub enum ExpressionError {
    #[error("Selector `{0} returned no results")]
    EmptySelector(String),

    #[error("Unexpected empty root node")]
    EmptyRoot,
}

fn trim_whitespace(input: String) -> String {
    input.trim().to_string()
}

fn handle_expression(
    roots: &[ElementRef],
    rhs: &Expression,
    lhs: &Option<Box<Action>>,
) -> Result<Value, ExpressionError> {
    return match rhs {
        Expression::Selector(selector, original_selector) => {
            let first_root = roots.first().ok_or(ExpressionError::EmptyRoot)?;
            let new_roots: Vec<_> = first_root.select(selector).collect();
            let first_new_root = new_roots
                .first()
                .ok_or_else(|| ExpressionError::EmptySelector(original_selector.clone()))?;
            match lhs {
                None => Ok(Value::String(trim_whitespace(
                    first_new_root.text().collect(),
                ))),
                Some(lhs) => Ok(convert_to_output(lhs, &new_roots)),
            }
        }
        Expression::Attribute(attr) => {
            let first_root = roots.first().ok_or(ExpressionError::EmptyRoot)?;
            Ok(first_root
                .value()
                .attrs
                .get(&QualName::new(
                    None,
                    Namespace::from(""),
                    LocalName::from(attr.as_str()),
                ))
                .map_or(Value::Null, |v| {
                    Value::String(trim_whitespace(v.to_string()))
                }))
        }
        Expression::Text => {
            let first_root = roots.first().ok_or(ExpressionError::EmptyRoot)?;
            Ok(Value::String(trim_whitespace(first_root.text().collect())))
        }
        Expression::Parent => {
            let first_root = roots.first().ok_or(ExpressionError::EmptyRoot)?;
            let parent_root = ElementRef::wrap(first_root.parent().unwrap()).unwrap();
            match lhs {
                None => handle_expression(&[parent_root], &Expression::Text, &None),
                Some(lhs) => Ok(convert_to_output(lhs, &vec![parent_root])),
            }
        }
        Expression::Sibling(idx) => {
            let first_root = roots.first().ok_or(ExpressionError::EmptyRoot)?;
            let mut next_sibling_elements = first_root
                .next_siblings()
                .filter(|s| s.value().is_element());
            let chosen_sibling =
                ElementRef::wrap(next_sibling_elements.nth(*idx - 1).unwrap()).unwrap();
            match lhs {
                None => handle_expression(&[chosen_sibling], &Expression::Text, &None),
                Some(lhs) => Ok(convert_to_output(lhs, &vec![chosen_sibling])),
            }
        }
    };
}

fn convert_to_output(item: &Action, roots: &Vec<ElementRef>) -> Value {
    return match item {
        Action::ForEachChild(hashmap) => Value::Array(
            roots
                .iter()
                .map(|root| {
                    let map = hashmap
                        .iter()
                        .map(|(key, value)| (key.clone(), convert_to_output(value, &vec![*root])))
                        .collect::<Map<_, _>>();
                    Value::Object(map)
                })
                .collect(),
        ),
        Action::ForEachChildArray(action) => Value::Array(
            roots
                .iter()
                .map(|root| convert_to_output(action, &vec![*root]))
                .collect(),
        ),
        Action::Child(hashmap) => {
            let map = hashmap
                .iter()
                .map(|(key, value)| (key.clone(), convert_to_output(value, roots)))
                .collect::<Map<_, _>>();
            Value::Object(map)
        }
        Action::Expression(rhs, lhs) => handle_expression(roots, rhs, lhs).unwrap_or(Value::Null),
    };
}

pub fn extract(input: &str, actions: &HashMap<&str, Action>) -> Value {
    let fragment = Html::parse_fragment(input);
    let root = fragment.root_element();
    let hashmap = actions
        .iter()
        .map(|(key, value)| (key.to_string(), convert_to_output(value, &vec![root])))
        .collect();
    Value::Object(hashmap)
}