1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
use std::error::Error;
use std::ops::Deref;
use lazy_static::lazy_static;
use regex::Regex;
use scraper;
use scraper::Html;
use scraper::Selector;
use scraper::ElementRef;
use crate::error::HnError;
pub mod comments;
pub mod listings;
pub use crate::parser::comments::CommentsParser;
pub use crate::parser::listings::ListingsParser;
pub trait HtmlParse {
type Item;
fn parse(html: &Html) -> Result<Self::Item, Box<dyn Error>>;
}
fn ancestor<'a>(node: &'a ElementRef, height: u32) -> Option<ElementRef<'a>> {
let mut curr_node = Deref::deref(node);
#[allow(unused_assignments)]
let mut parent = curr_node.parent();
let mut i = 0;
while i < height {
parent = curr_node.parent();
curr_node = match parent {
Some(ref node_ref) => node_ref,
None => { return None; },
};
i += 1;
}
ElementRef::wrap(*curr_node)
}
fn append_more_text_nodes(node: &ElementRef, qs: &Selector, text: &mut String, ) {
for child in node.select(qs) {
match child.text().next() {
None => {
continue;
}
Some(more_text) => {
text.push('\n');
text.push_str(more_text);
},
}
}
}
lazy_static! {
static ref FNID_REGEX: Regex = Regex::new(r#"<input.*value="(.+?)".*>"#).unwrap();
}
pub fn extract_fnid(el: &ElementRef) -> Result<String, Box<dyn Error>> {
let text = el.html();
let captures = match FNID_REGEX.captures(&text) {
Some(captures) => captures,
None => {
return Err(Box::new(HnError::HtmlParsingError));
}
};
let fnid = match captures.get(1) {
Some(fnid) => {
fnid.as_str().to_string()
},
None => {
return Err(Box::new(HnError::HtmlParsingError));
}
};
Ok(fnid)
}