easy_regex/helpers/
html_pattern.rs

1//! Helper methods related to creating regex for HTML elements.
2
3use std::borrow::BorrowMut;
4
5/// A Recursive structure for defining HTML pattern.
6pub struct HtmlPattern<'a> {
7    pub tag_name: &'a str,
8    pub essential_attribute: Option<&'a str>,
9    pub child_html: Box<Option<HtmlPattern<'a>>>,
10}
11
12/// Creates regular expressions for HTML elements recursively.
13/// 
14/// This methods takes an HTML element which could have nested elements and outputs a regular expression
15/// that captures the innerHTML. If the element has nested elements, it will caputure the deepest child element's innerHTML.
16///
17/// # Examples
18///
19/// ```
20/// use easy_regex::helpers::html_pattern::{HtmlPattern, create_pattern};
21///
22/// let pattern_str = HtmlPattern {
23///     tag_name: "div",
24///     essential_attribute: Some("id=\"86\""),
25///     child_html: Box::new(Some(HtmlPattern {
26///         tag_name: "div",
27///         essential_attribute: Some("class=\"some-class\""),
28///         child_html: Box::new(None),
29///     })
30/// )};
31/// 
32/// let result = create_pattern(pattern_str);
33/// assert_eq!(
34/// "<div[^<>]*id=\"86\"[^<>]*>.*<div[^<>]*class=\"some-class\"[^<>]*>(.*)</div>.*</div>",
35/// result
36/// );
37/// ```
38pub fn create_pattern(pattern: HtmlPattern) -> String {
39    let mut html_pattern = format!("<{}[^<>]*", pattern.tag_name);
40
41    if let Some(ess_attr) = pattern.essential_attribute {
42        let attr = format!("{}[^<>]*>.*", ess_attr);
43        html_pattern.push_str(&attr);
44    } else {
45        html_pattern.push_str(">.*");
46    }
47
48    let some_ref = &mut String::new();
49    some_ref.push_str(&html_pattern);
50    recursive_pattern(some_ref.borrow_mut(), pattern.child_html.unwrap());
51    let end = format!(".*</{}>", pattern.tag_name);
52    some_ref.push_str(&end);
53
54    some_ref.to_string()
55}
56
57fn recursive_pattern(some_ref: &mut String, pattern: HtmlPattern) -> String {
58    let mut html_pattern = format!("<{}[^<>]*", pattern.tag_name);
59
60    if let Some(ess_attr) = pattern.essential_attribute {
61        let attr = format!("{}[^<>]*>", ess_attr);
62        html_pattern.push_str(&attr);
63    } else {
64        html_pattern.push_str(">");
65    }
66
67    some_ref.push_str(&html_pattern);
68
69    if pattern.child_html.is_some() {
70        let child = pattern.child_html.unwrap();
71        recursive_pattern(some_ref.borrow_mut(), child);
72        let end = format!(".*</{}>", pattern.tag_name);
73        some_ref.push_str(&end);
74    } else {
75        let tail_pattern = format!("(.*)</{}>", pattern.tag_name);
76        some_ref.push_str(&tail_pattern);
77    }
78    some_ref.to_string()
79}
80
81#[cfg(test)]
82mod tests {
83    use super::{create_pattern, HtmlPattern};
84
85    #[test]
86    fn one_level_nested_elements() {
87        let pattern_str = HtmlPattern {
88            tag_name: "div",
89            essential_attribute: Some("id=\"86\""),
90            child_html: Box::new(Some(HtmlPattern {
91                tag_name: "div",
92                essential_attribute: Some("class=\"some-class\""),
93                child_html: Box::new(None),
94            })),
95        };
96
97        let result = create_pattern(pattern_str);
98        assert_eq!(
99            "<div[^<>]*id=\"86\"[^<>]*>.*<div[^<>]*class=\"some-class\"[^<>]*>(.*)</div>.*</div>",
100            result
101        );
102    }
103
104    #[test]
105    fn two_level_nested_elements() {
106        let pattern_str = HtmlPattern {
107            tag_name: "div",
108            essential_attribute: Some("id=\"86\""),
109            child_html: Box::new(Some(HtmlPattern {
110                tag_name: "div",
111                essential_attribute: Some("class=\"some-class\""),
112                child_html: Box::new(Some(HtmlPattern {
113                    tag_name: "p",
114                    essential_attribute: None,
115                    child_html: Box::new(None),
116                })),
117            })),
118        };
119
120        let result = create_pattern(pattern_str);
121        assert_eq!("<div[^<>]*id=\"86\"[^<>]*>.*<div[^<>]*class=\"some-class\"[^<>]*><p[^<>]*>(.*)</p>.*</div>.*</div>", result);
122    }
123}