hacker_news/parser/
mod.rs1use std::error::Error;
2use std::ops::Deref;
3use lazy_static::lazy_static;
4use regex::Regex;
5use scraper;
6use scraper::Html;
7use scraper::Selector;
8use scraper::ElementRef;
9use crate::error::HnError;
10
11pub mod comments;
12pub mod listings;
13
14pub use crate::parser::comments::CommentsParser;
16pub use crate::parser::listings::ListingsParser;
17
18pub trait HtmlParse {
19 type Item;
20
21 fn parse(html: &Html) -> Result<Self::Item, Box<dyn Error>>;
22}
23
24fn ancestor<'a>(node: &'a ElementRef, height: u32) -> Option<ElementRef<'a>> {
26
27 let mut curr_node = Deref::deref(node);
32 #[allow(unused_assignments)]
33 let mut parent = curr_node.parent();
34 let mut i = 0;
35
36 while i < height {
37 parent = curr_node.parent();
38 curr_node = match parent {
39 Some(ref node_ref) => node_ref,
40 None => { return None; },
41 };
42 i += 1;
43 }
44
45 ElementRef::wrap(*curr_node)
46}
47
48fn append_more_text_nodes(node: &ElementRef, qs: &Selector, text: &mut String, ) {
50 for child in node.select(qs) {
51 match child.text().next() {
52 None => {
53 continue;
56 }
57 Some(more_text) => {
58 text.push('\n');
60 text.push_str(more_text);
61 },
62 }
63 }
64}
65
66lazy_static! {
67 static ref FNID_REGEX: Regex = Regex::new(r#"<input.*value="(.+?)".*>"#).unwrap();
68}
69
70pub fn extract_fnid(el: &ElementRef) -> Result<String, Box<dyn Error>> {
71 let text = el.html();
72 let captures = match FNID_REGEX.captures(&text) {
73 Some(captures) => captures,
74 None => {
75 return Err(Box::new(HnError::HtmlParsingError));
76 }
77 };
78 let fnid = match captures.get(1) {
79 Some(fnid) => {
80 fnid.as_str().to_string()
81 },
82 None => {
83 return Err(Box::new(HnError::HtmlParsingError));
84 }
85 };
86
87 Ok(fnid)
88}
89