nu_plugin_selector/
selector.rs1use crate::Table;
2use nu_protocol::{value::StringExt, Primitive, TaggedDictBuilder, UntaggedValue, Value};
3use nu_source::Tag;
4use scraper::{Html, Selector as ScraperSelector};
5
6pub struct Selector {
7 pub query: String,
8 pub tag: Tag,
9 pub as_html: bool,
10 pub attribute: String,
11 pub as_table: Value,
12 pub inspect: bool,
13}
14
15impl Selector {
16 pub fn new() -> Selector {
17 Selector {
18 query: String::new(),
19 tag: Tag::unknown(),
20 as_html: false,
21 attribute: String::new(),
22 as_table: Value::new(
23 UntaggedValue::Primitive(Primitive::String("".to_string())),
24 Tag::unknown(),
25 ),
26 inspect: false,
27 }
28 }
29}
30
31impl Default for Selector {
32 fn default() -> Self {
33 Self::new()
34 }
35}
36
37pub fn begin_selector_query(input_html: String, selector: &Selector) -> Vec<Value> {
38 if !selector.as_table.value.is_string() {
39 retrieve_tables(input_html.as_str(), &selector.as_table, selector.inspect)
40 } else {
41 match selector.attribute.is_empty() {
42 true => execute_selector_query(
43 input_html.as_str(),
44 selector.query.as_str(),
45 selector.as_html,
46 ),
47 false => execute_selector_query_with_attribute(
48 input_html.as_str(),
49 selector.query.as_str(),
50 selector.attribute.as_str(),
51 ),
52 }
53 }
54}
55
56pub fn retrieve_tables(input_string: &str, columns: &Value, inspect_mode: bool) -> Vec<Value> {
57 let html = input_string;
58 let mut cols = Vec::new();
59 if let UntaggedValue::Table(t) = &columns.value {
60 for x in t {
61 cols.push(x.convert_to_string());
62 }
63 }
64
65 if inspect_mode {
66 eprintln!("Passed in Column Headers = {:#?}", &cols,);
67 }
68
69 let tables = match Table::find_by_headers(html, &cols) {
70 Some(t) => {
71 if inspect_mode {
72 eprintln!("Table Found = {:#?}", &t);
73 }
74 t
75 }
76 None => vec![Table::empty()],
77 };
78 if tables.len() == 1 {
79 return retrieve_table(
80 tables
81 .into_iter()
82 .next()
83 .expect("This should never trigger"),
84 columns,
85 );
86 }
87 tables
88 .into_iter()
89 .map(move |table| {
90 UntaggedValue::Table(retrieve_table(table, columns)).into_value(Tag::unknown())
91 })
92 .collect()
93}
94
95fn retrieve_table(mut table: Table, columns: &Value) -> Vec<Value> {
96 let mut cols = Vec::new();
97 if let UntaggedValue::Table(t) = &columns.value {
98 for x in t {
99 cols.push(x.convert_to_string());
100 }
101 }
102
103 if cols.is_empty() && !table.headers().is_empty() {
104 for col in table.headers().keys() {
105 cols.push(col.to_string());
106 }
107 }
108
109 let mut table_out = Vec::new();
110 let mut at_least_one_row_filled = false;
114 if cols.is_empty() {
116 at_least_one_row_filled = true;
117 let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect();
118
119 for row in &table_with_no_empties {
120 let mut dict = TaggedDictBuilder::new(Tag::unknown());
121 for (counter, cell) in row.iter().enumerate() {
122 let col_name = format!("Column{}", counter);
123 dict.insert_value(
124 col_name,
125 UntaggedValue::Primitive(Primitive::String(cell.to_string()))
126 .into_value(Tag::unknown()),
127 );
128 }
129 table_out.push(dict.into_value());
130 }
131 } else {
132 for row in &table {
133 let mut dict = TaggedDictBuilder::new(Tag::unknown());
134 for col in &cols {
136 let key = col.to_string();
138 let val = row
139 .get(col)
140 .unwrap_or(&format!("Missing column: '{}'", &col))
141 .to_string();
142 if !at_least_one_row_filled && val != format!("Missing column: '{}'", &col) {
143 at_least_one_row_filled = true;
144 }
145 dict.insert_value(
146 key,
147 UntaggedValue::Primitive(Primitive::String(val)).into_value(Tag::unknown()),
148 );
149 }
150 table_out.push(dict.into_value());
151 }
152 }
153 if !at_least_one_row_filled {
154 let mut data2 = Vec::new();
155 for x in &table.data {
156 data2.push(x.join(", "));
157 }
158 table.data = vec![data2];
159 return retrieve_table(table, columns);
160 }
161 table_out
162}
163
164fn execute_selector_query_with_attribute(
165 input_string: &str,
166 query_string: &str,
167 attribute: &str,
168) -> Vec<Value> {
169 let doc = Html::parse_fragment(input_string);
170
171 doc.select(&css(query_string))
172 .map(|selection| {
173 selection
174 .value()
175 .attr(attribute)
176 .unwrap_or("")
177 .to_string()
178 .to_string_value_create_tag()
179 })
180 .collect()
181}
182
183fn execute_selector_query(input_string: &str, query_string: &str, as_html: bool) -> Vec<Value> {
184 let doc = Html::parse_fragment(input_string);
185
186 match as_html {
187 true => doc
188 .select(&css(query_string))
189 .map(|selection| selection.html().to_string_value_create_tag())
190 .collect(),
191 false => doc
192 .select(&css(query_string))
193 .map(|selection| {
194 selection
195 .text()
196 .fold("".to_string(), |acc, x| format!("{}{}", acc, x))
197 .to_string_value_create_tag()
198 })
199 .collect(),
200 }
201}
202
203pub fn css(selector: &str) -> ScraperSelector {
204 ScraperSelector::parse(selector).expect("this should never trigger")
205}
206
207#[cfg(test)]
208mod tests {
209 use super::*;
210
211 const SIMPLE_LIST: &str = r#"
212 <ul>
213 <li>Coffee</li>
214 <li>Tea</li>
215 <li>Milk</li>
216 </ul>
217"#;
218
219 #[test]
220 fn test_first_child_is_not_empty() {
221 assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty())
222 }
223
224 #[test]
225 fn test_first_child() {
226 assert_eq!(
227 vec!["Coffee".to_string().to_string_value_create_tag()],
228 execute_selector_query(SIMPLE_LIST, "li:first-child", false)
229 )
230 }
231}