Skip to main content

muffy_document/html/
parse.rs

1use super::document::Document;
2use html5ever::{parse_document, tendril::TendrilSink};
3use markup5ever_rcdom::RcDom;
4use std::io;
5
6/// Parses an HTML document.
7pub fn parse(source: &str) -> Result<Document, io::Error> {
8    parse_bytes(source.as_bytes())
9}
10
11/// Parses an HTML document from bytes.
12pub fn parse_bytes(mut source: &[u8]) -> Result<Document, io::Error> {
13    parse_document(RcDom::default(), Default::default())
14        .from_utf8()
15        .read_from(&mut source)
16        .map(|dom| Document::from_markup5ever(&dom.document))
17}
18
19#[cfg(test)]
20mod tests {
21    use super::{
22        super::{element::Element, node::Node},
23        *,
24    };
25    use alloc::sync::Arc;
26    use pretty_assertions::assert_eq;
27
28    #[test]
29    fn parse_empty_string() {
30        assert_eq!(
31            parse("").unwrap(),
32            Document::new(vec![Arc::new(Node::Element(Element::new(
33                "html".to_string(),
34                vec![],
35                vec![
36                    Arc::new(Node::Element(Element::new(
37                        "head".to_string(),
38                        vec![],
39                        vec![]
40                    ))),
41                    Arc::new(Node::Element(Element::new(
42                        "body".to_string(),
43                        vec![],
44                        vec![]
45                    ))),
46                ],
47            )))])
48        );
49    }
50
51    #[test]
52    fn parse_simple_html() {
53        assert_eq!(
54            parse("<html><body><p>Hello</p></body></html>").unwrap(),
55            Document::new(vec![Arc::new(Node::Element(Element::new(
56                "html".to_string(),
57                vec![],
58                vec![
59                    Arc::new(Node::Element(Element::new(
60                        "head".to_string(),
61                        vec![],
62                        vec![]
63                    ))),
64                    Arc::new(Node::Element(Element::new(
65                        "body".to_string(),
66                        vec![],
67                        vec![Arc::new(Node::Element(Element::new(
68                            "p".to_string(),
69                            vec![],
70                            vec![Arc::new(Node::Text("Hello".to_string()))],
71                        )))],
72                    ))),
73                ],
74            )))])
75        );
76    }
77
78    #[test]
79    fn parse_with_attributes() {
80        assert_eq!(
81            parse("<html><body><p class=\"foo\">Hello</p></body></html>").unwrap(),
82            Document::new(vec![Arc::new(Node::Element(Element::new(
83                "html".to_string(),
84                vec![],
85                vec![
86                    Arc::new(Node::Element(Element::new(
87                        "head".to_string(),
88                        vec![],
89                        vec![]
90                    ))),
91                    Arc::new(Node::Element(Element::new(
92                        "body".to_string(),
93                        vec![],
94                        vec![Arc::new(Node::Element(Element::new(
95                            "p".to_string(),
96                            vec![("class".to_string(), "foo".to_string())],
97                            vec![Arc::new(Node::Text("Hello".to_string()))],
98                        )))],
99                    ))),
100                ],
101            )))])
102        );
103    }
104
105    #[test]
106    fn ignore_comments() {
107        assert_eq!(
108            parse("<html><body><!-- comment --><p>Hello</p></body></html>").unwrap(),
109            Document::new(vec![Arc::new(Node::Element(Element::new(
110                "html".to_string(),
111                vec![],
112                vec![
113                    Arc::new(Node::Element(Element::new(
114                        "head".to_string(),
115                        vec![],
116                        vec![]
117                    ))),
118                    Arc::new(Node::Element(Element::new(
119                        "body".to_string(),
120                        vec![],
121                        vec![Arc::new(Node::Element(Element::new(
122                            "p".to_string(),
123                            vec![],
124                            vec![Arc::new(Node::Text("Hello".to_string()))],
125                        )))],
126                    ))),
127                ],
128            )))])
129        );
130    }
131}