nu_plugin_selector/
selector.rs

1use crate::Table;
2use nu_protocol::{value::StringExt, Primitive, TaggedDictBuilder, UntaggedValue, Value};
3use nu_source::Tag;
4use scraper::{Html, Selector as ScraperSelector};
5
6pub struct Selector {
7    pub query: String,
8    pub tag: Tag,
9    pub as_html: bool,
10    pub attribute: String,
11    pub as_table: Value,
12    pub inspect: bool,
13}
14
15impl Selector {
16    pub fn new() -> Selector {
17        Selector {
18            query: String::new(),
19            tag: Tag::unknown(),
20            as_html: false,
21            attribute: String::new(),
22            as_table: Value::new(
23                UntaggedValue::Primitive(Primitive::String("".to_string())),
24                Tag::unknown(),
25            ),
26            inspect: false,
27        }
28    }
29}
30
31impl Default for Selector {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37pub fn begin_selector_query(input_html: String, selector: &Selector) -> Vec<Value> {
38    if !selector.as_table.value.is_string() {
39        retrieve_tables(input_html.as_str(), &selector.as_table, selector.inspect)
40    } else {
41        match selector.attribute.is_empty() {
42            true => execute_selector_query(
43                input_html.as_str(),
44                selector.query.as_str(),
45                selector.as_html,
46            ),
47            false => execute_selector_query_with_attribute(
48                input_html.as_str(),
49                selector.query.as_str(),
50                selector.attribute.as_str(),
51            ),
52        }
53    }
54}
55
56pub fn retrieve_tables(input_string: &str, columns: &Value, inspect_mode: bool) -> Vec<Value> {
57    let html = input_string;
58    let mut cols = Vec::new();
59    if let UntaggedValue::Table(t) = &columns.value {
60        for x in t {
61            cols.push(x.convert_to_string());
62        }
63    }
64
65    if inspect_mode {
66        eprintln!("Passed in Column Headers = {:#?}", &cols,);
67    }
68
69    let tables = match Table::find_by_headers(html, &cols) {
70        Some(t) => {
71            if inspect_mode {
72                eprintln!("Table Found = {:#?}", &t);
73            }
74            t
75        }
76        None => vec![Table::empty()],
77    };
78    if tables.len() == 1 {
79        return retrieve_table(
80            tables
81                .into_iter()
82                .next()
83                .expect("This should never trigger"),
84            columns,
85        );
86    }
87    tables
88        .into_iter()
89        .map(move |table| {
90            UntaggedValue::Table(retrieve_table(table, columns)).into_value(Tag::unknown())
91        })
92        .collect()
93}
94
95fn retrieve_table(mut table: Table, columns: &Value) -> Vec<Value> {
96    let mut cols = Vec::new();
97    if let UntaggedValue::Table(t) = &columns.value {
98        for x in t {
99            cols.push(x.convert_to_string());
100        }
101    }
102
103    if cols.is_empty() && !table.headers().is_empty() {
104        for col in table.headers().keys() {
105            cols.push(col.to_string());
106        }
107    }
108
109    let mut table_out = Vec::new();
110    // sometimes there are tables where the first column is the headers, kind of like
111    // a table has ben rotated ccw 90 degrees, in these cases all columns will be missing
112    // we keep track of this with this variable so we can deal with it later
113    let mut at_least_one_row_filled = false;
114    // if columns are still empty, let's just make a single column table with the data
115    if cols.is_empty() {
116        at_least_one_row_filled = true;
117        let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect();
118
119        for row in &table_with_no_empties {
120            let mut dict = TaggedDictBuilder::new(Tag::unknown());
121            for (counter, cell) in row.iter().enumerate() {
122                let col_name = format!("Column{}", counter);
123                dict.insert_value(
124                    col_name,
125                    UntaggedValue::Primitive(Primitive::String(cell.to_string()))
126                        .into_value(Tag::unknown()),
127                );
128            }
129            table_out.push(dict.into_value());
130        }
131    } else {
132        for row in &table {
133            let mut dict = TaggedDictBuilder::new(Tag::unknown());
134            // eprintln!("row={:?}", &row);
135            for col in &cols {
136                //eprintln!("col={:?}", &col);
137                let key = col.to_string();
138                let val = row
139                    .get(col)
140                    .unwrap_or(&format!("Missing column: '{}'", &col))
141                    .to_string();
142                if !at_least_one_row_filled && val != format!("Missing column: '{}'", &col) {
143                    at_least_one_row_filled = true;
144                }
145                dict.insert_value(
146                    key,
147                    UntaggedValue::Primitive(Primitive::String(val)).into_value(Tag::unknown()),
148                );
149            }
150            table_out.push(dict.into_value());
151        }
152    }
153    if !at_least_one_row_filled {
154        let mut data2 = Vec::new();
155        for x in &table.data {
156            data2.push(x.join(", "));
157        }
158        table.data = vec![data2];
159        return retrieve_table(table, columns);
160    }
161    table_out
162}
163
164fn execute_selector_query_with_attribute(
165    input_string: &str,
166    query_string: &str,
167    attribute: &str,
168) -> Vec<Value> {
169    let doc = Html::parse_fragment(input_string);
170
171    doc.select(&css(query_string))
172        .map(|selection| {
173            selection
174                .value()
175                .attr(attribute)
176                .unwrap_or("")
177                .to_string()
178                .to_string_value_create_tag()
179        })
180        .collect()
181}
182
183fn execute_selector_query(input_string: &str, query_string: &str, as_html: bool) -> Vec<Value> {
184    let doc = Html::parse_fragment(input_string);
185
186    match as_html {
187        true => doc
188            .select(&css(query_string))
189            .map(|selection| selection.html().to_string_value_create_tag())
190            .collect(),
191        false => doc
192            .select(&css(query_string))
193            .map(|selection| {
194                selection
195                    .text()
196                    .fold("".to_string(), |acc, x| format!("{}{}", acc, x))
197                    .to_string_value_create_tag()
198            })
199            .collect(),
200    }
201}
202
203pub fn css(selector: &str) -> ScraperSelector {
204    ScraperSelector::parse(selector).expect("this should never trigger")
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    const SIMPLE_LIST: &str = r#"
212    <ul>
213        <li>Coffee</li>
214        <li>Tea</li>
215        <li>Milk</li>
216    </ul>
217"#;
218
219    #[test]
220    fn test_first_child_is_not_empty() {
221        assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty())
222    }
223
224    #[test]
225    fn test_first_child() {
226        assert_eq!(
227            vec!["Coffee".to_string().to_string_value_create_tag()],
228            execute_selector_query(SIMPLE_LIST, "li:first-child", false)
229        )
230    }
231}