1#![allow(warnings)]
2use regex::{Regex, RegexBuilder};
3use std::collections::HashMap;
4pub use self::stack::Stack;
5pub use self::token::Token;
6
7pub mod query;
8pub mod stack;
9pub mod token;
10pub mod token_iter;
11
12
13pub fn parse_html(html: &str) -> Stack {
15 let mut stack = Stack::new(html);
17
18 let re = RegexBuilder::new(r"(?s)<!--(.*?)-->")
20 .dot_matches_new_line(true)
21 .build()
22 .unwrap();
23 for cap in re.captures_iter(html) {
24 let tag_string = cap.get(0).unwrap().as_str();
25 stack.push("!", "", &true, tag_string);
26 }
27
28 let re = Regex::new(r"<([\/]?)(.*?)([\/]?)>").unwrap();
30 for cap in re.captures_iter(html) {
31 let is_closing: bool = cap.get(1).unwrap().as_str() == "/";
33 let mut tag = cap.get(2).unwrap().as_str().trim();
34 let is_single: bool = cap.get(3).unwrap().as_str() == "/";
35 let tag_string = cap.get(0).unwrap().as_str();
36
37 if tag.starts_with('!') {
39 continue;
40 }
41
42 let mut attr_string = "";
44 if let Some(cindex) = tag.find(' ') {
45 attr_string = tag[cindex + 1..].trim();
46 tag = &tag[..cindex];
47 }
48
49 if is_closing {
51 stack.close_tag(tag, tag_string);
52 } else {
53 stack.push(tag, attr_string, &is_single, tag_string);
54 }
55 }
56
57 stack
58}
59
60pub fn parse_attr(attr_string: &str) -> (HashMap<String, String>, String) {
62 let mut attr = HashMap::new();
64 let mut attr_extra: String = attr_string.to_string();
65
66 let re = Regex::new(r#"([a-zA-Z0-9_\-]+?)=(.+?)(\"|\'|#)"#).unwrap();
68 for cap in re.captures_iter(attr_string) {
69 let key = cap.get(1).unwrap().as_str().trim();
70 let value = cap
71 .get(2)
72 .unwrap()
73 .as_str()
74 .trim_start_matches('\'')
75 .trim_start_matches('"')
76 .trim();
77 attr.insert(key.to_string(), value.to_string());
78 attr_extra = attr_extra.replace(cap.get(0).unwrap().as_str(), "");
79 }
80
81 (attr, attr_extra.trim().to_string())
82}