parsex/
lib.rs

1#![allow(warnings)]
2use regex::{Regex, RegexBuilder};
3use std::collections::HashMap;
4pub use self::stack::Stack;
5pub use self::token::Token;
6
7pub mod query;
8pub mod stack;
9pub mod token;
10pub mod token_iter;
11
12
13/// Parse block of HTML code into a token stack
14pub fn parse_html(html: &str) -> Stack {
15    // Start token stack
16    let mut stack = Stack::new(html);
17
18    // Extract comments
19    let re = RegexBuilder::new(r"(?s)<!--(.*?)-->")
20        .dot_matches_new_line(true)
21        .build()
22        .unwrap();
23    for cap in re.captures_iter(html) {
24        let tag_string = cap.get(0).unwrap().as_str();
25        stack.push("!", "", &true, tag_string);
26    }
27
28    // Go through tags
29    let re = Regex::new(r"<([\/]?)(.*?)([\/]?)>").unwrap();
30    for cap in re.captures_iter(html) {
31        // Set variables
32        let is_closing: bool = cap.get(1).unwrap().as_str() == "/";
33        let mut tag = cap.get(2).unwrap().as_str().trim();
34        let is_single: bool = cap.get(3).unwrap().as_str() == "/";
35        let tag_string = cap.get(0).unwrap().as_str();
36
37        // Skip if needed
38        if tag.starts_with('!') {
39            continue;
40        }
41
42        // Get attr string, if needed
43        let mut attr_string = "";
44        if let Some(cindex) = tag.find(' ') {
45            attr_string = tag[cindex + 1..].trim();
46            tag = &tag[..cindex];
47        }
48
49        // Process tag
50        if is_closing {
51            stack.close_tag(tag, tag_string);
52        } else {
53            stack.push(tag, attr_string, &is_single, tag_string);
54        }
55    }
56
57    stack
58}
59
60/// Parse string into hashmap of attributes
61pub fn parse_attr(attr_string: &str) -> (HashMap<String, String>, String) {
62    // Initialize
63    let mut attr = HashMap::new();
64    let mut attr_extra: String = attr_string.to_string();
65
66    // Pares attributes
67    let re = Regex::new(r#"([a-zA-Z0-9_\-]+?)=(.+?)(\"|\'|#)"#).unwrap();
68    for cap in re.captures_iter(attr_string) {
69        let key = cap.get(1).unwrap().as_str().trim();
70        let value = cap
71            .get(2)
72            .unwrap()
73            .as_str()
74            .trim_start_matches('\'')
75            .trim_start_matches('"')
76            .trim();
77        attr.insert(key.to_string(), value.to_string());
78        attr_extra = attr_extra.replace(cap.get(0).unwrap().as_str(), "");
79    }
80
81    (attr, attr_extra.trim().to_string())
82}