html_filter/parse/
mod.rs

1//! Module that transforms a [`String`] into an [`Html`] tree.
2
3mod tag;
4use core::str::Chars;
5
6use crate::prelude::Html;
7use crate::types::html_builder::HtmlBuilder;
8use crate::types::tag::TagBuilder;
9
10/// Tags that cannot have a content
11///
12/// This means that they are always self-closing tags: `<meta>` and `<br>` are
13/// closed.
14const AUTO_CLOSING_TAGS: [&str; 2] = ["meta", "br"];
15
16impl Html {
17    /// Parses an HTML string into a Dom tree.
18    ///
19    /// # Errors
20    ///
21    /// This function returns an error when the input HTML's syntax is invalid.
22    ///
23    /// # Examples
24    ///
25    /// ```
26    /// use html_filter::prelude::*;
27    ///
28    /// let html: &str = r#"
29    /// <!DOCTYPE html>
30    /// <html lang="en">
31    ///     <head>
32    ///         <title>Html sample</title>
33    ///     </head>
34    ///     <body>
35    ///         <p>This is an html sample.</p>
36    ///     </body>
37    /// </html>
38    /// "#;
39    /// let tree: Html = Html::parse(html).expect("Invalid HTML");
40    /// assert!(format!("{tree}") == html);
41    /// ```
42    pub fn parse(html: &str) -> Result<Self, String> {
43        let mut tree = HtmlBuilder::default();
44        tree.parse(&mut html.chars()).map(|()| tree.into_html())
45    }
46}
47
48impl HtmlBuilder {
49    /// Wrapper for the [`Html::parse`] method.
50    ///
51    /// This method transforms a flow of chars into an Html tree.
52    fn parse(&mut self, chars: &mut Chars<'_>) -> Result<(), String> {
53        let mut dash_count: u32 = 0;
54        let mut style = false;
55        let mut script = false;
56        let mut comment = false;
57        while let Some(ch) = chars.next() {
58            if !comment && (style || script) {
59                if ch == '<'
60                    && let Ok(TagBuilder::Close(name)) = TagBuilder::parse(chars)
61                {
62                    if style && name == "style" {
63                        style = false;
64                        self.close_tag(&name)?;
65                        continue;
66                    }
67                    if script && name == "script" {
68                        script = false;
69                        self.close_tag(&name)?;
70                        continue;
71                    }
72                }
73                self.push_char(ch);
74            } else if ch == '-' {
75                #[expect(clippy::arithmetic_side_effects, reason = "checked")]
76                if dash_count == 2 {
77                    self.push_char('-');
78                } else {
79                    dash_count += 1;
80                }
81            } else if ch == '>' && dash_count == 2 {
82                if !self.close_comment() {
83                    return Err("Tried to close unopened comment.".to_owned());
84                }
85                comment = false;
86                dash_count = 0;
87            } else {
88                for _ in 0..dash_count {
89                    self.push_char('-');
90                }
91                dash_count = 0;
92                if comment {
93                    self.push_char(ch);
94                } else if ch == '<' {
95                    match TagBuilder::parse(chars)? {
96                        TagBuilder::Doctype { name, attr } =>
97                            self.push_node(Self::Doctype { name, attr }),
98                        TagBuilder::Open(tag) => {
99                            if tag.as_name() == "style" {
100                                style = true;
101                            } else if tag.as_name() == "script" {
102                                script = true;
103                            }
104                            self.push_tag(tag, false);
105                        }
106                        TagBuilder::OpenClose(tag) => self.push_tag(tag, true),
107                        TagBuilder::Close(name) => self.close_tag(&name)?,
108                        TagBuilder::OpenComment => {
109                            self.push_comment();
110                            comment = true;
111                        }
112                    }
113                } else {
114                    self.push_char(ch);
115                }
116            }
117        }
118        Ok(())
119    }
120}