ahref/
parser.rs

1use regex::Regex;
2use tagparser;
3
4pub struct Parser {
5    html: String,
6}
7
8impl Parser {
9    pub fn new(html: String) -> Self {
10        Parser { html }
11    }
12
13    pub fn parse_tags(&mut self) -> Vec<String> {
14        tagparser::parse_tags(self.html.clone(), "a".to_string())
15    }
16
17    pub fn parse_links(&mut self) -> Vec<String> {
18        let mut links = Vec::new();
19
20        for tag in self.parse_tags() {
21            if self.create_regex().is_match(&tag) {
22                let url = self.convert_a_href_to_url(tag);
23                links.push(url.to_string());
24            }
25        }
26
27        links
28    }
29
30    fn convert_a_href_to_url(&mut self, tag_a: String) -> String {
31        let regex_link = self.create_regex();
32
33        let mut url = regex_link
34            .find(&tag_a)
35            .unwrap()
36            .as_str()
37            .replace("href=", "")
38            .replace("\'", "")
39            .replace("\"", "");
40
41        // remove after space anything that is not a hyperlink
42        let offset = url.find(" ").unwrap_or(url.len());
43        url.replace_range(offset.., "");
44
45        url
46    }
47
48    fn create_regex(&self) -> Regex {
49        let regex_string = "href='.+'|href=\".+\"";
50
51        Regex::new(regex_string).unwrap()
52    }
53}