html_filter/parse/mod.rs
1//! Module that transforms a [`String`] into an [`Html`] tree.
2
3mod tag;
4use core::str::Chars;
5
6use crate::prelude::Html;
7use crate::types::html_builder::HtmlBuilder;
8use crate::types::tag::TagBuilder;
9
10/// Tags that cannot have a content
11///
12/// This means that they are always self-closing tags: `<meta>` and `<br>` are
13/// closed.
14const AUTO_CLOSING_TAGS: [&str; 2] = ["meta", "br"];
15
16impl Html {
17 /// Parses an HTML string into a Dom tree.
18 ///
19 /// # Errors
20 ///
21 /// This function returns an error when the input HTML's syntax is invalid.
22 ///
23 /// # Examples
24 ///
25 /// ```
26 /// use html_filter::prelude::*;
27 ///
28 /// let html: &str = r#"
29 /// <!DOCTYPE html>
30 /// <html lang="en">
31 /// <head>
32 /// <title>Html sample</title>
33 /// </head>
34 /// <body>
35 /// <p>This is an html sample.</p>
36 /// </body>
37 /// </html>
38 /// "#;
39 /// let tree: Html = Html::parse(html).expect("Invalid HTML");
40 /// assert!(format!("{tree}") == html);
41 /// ```
42 pub fn parse(html: &str) -> Result<Self, String> {
43 let mut tree = HtmlBuilder::default();
44 tree.parse(&mut html.chars()).map(|()| tree.into_html())
45 }
46}
47
48impl HtmlBuilder {
49 /// Wrapper for the [`Html::parse`] method.
50 ///
51 /// This method transforms a flow of chars into an Html tree.
52 fn parse(&mut self, chars: &mut Chars<'_>) -> Result<(), String> {
53 let mut dash_count: u32 = 0;
54 let mut style = false;
55 let mut script = false;
56 let mut comment = false;
57 while let Some(ch) = chars.next() {
58 if !comment && (style || script) {
59 if ch == '<'
60 && let Ok(TagBuilder::Close(name)) = TagBuilder::parse(chars)
61 {
62 if style && name == "style" {
63 style = false;
64 self.close_tag(&name)?;
65 continue;
66 }
67 if script && name == "script" {
68 script = false;
69 self.close_tag(&name)?;
70 continue;
71 }
72 }
73 self.push_char(ch);
74 } else if ch == '-' {
75 #[expect(clippy::arithmetic_side_effects, reason = "checked")]
76 if dash_count == 2 {
77 self.push_char('-');
78 } else {
79 dash_count += 1;
80 }
81 } else if ch == '>' && dash_count == 2 {
82 if !self.close_comment() {
83 return Err("Tried to close unopened comment.".to_owned());
84 }
85 comment = false;
86 dash_count = 0;
87 } else {
88 for _ in 0..dash_count {
89 self.push_char('-');
90 }
91 dash_count = 0;
92 if comment {
93 self.push_char(ch);
94 } else if ch == '<' {
95 match TagBuilder::parse(chars)? {
96 TagBuilder::Doctype { name, attr } =>
97 self.push_node(Self::Doctype { name, attr }),
98 TagBuilder::Open(tag) => {
99 if tag.as_name() == "style" {
100 style = true;
101 } else if tag.as_name() == "script" {
102 script = true;
103 }
104 self.push_tag(tag, false);
105 }
106 TagBuilder::OpenClose(tag) => self.push_tag(tag, true),
107 TagBuilder::Close(name) => self.close_tag(&name)?,
108 TagBuilder::OpenComment => {
109 self.push_comment();
110 comment = true;
111 }
112 }
113 } else {
114 self.push_char(ch);
115 }
116 }
117 }
118 Ok(())
119 }
120}