muffy_document/html/
parse.rs1use super::document::Document;
2use html5ever::{parse_document, tendril::TendrilSink};
3use markup5ever_rcdom::RcDom;
4use std::io;
5
6pub fn parse(source: &str) -> Result<Document, io::Error> {
8 parse_bytes(source.as_bytes())
9}
10
11pub fn parse_bytes(mut source: &[u8]) -> Result<Document, io::Error> {
13 parse_document(RcDom::default(), Default::default())
14 .from_utf8()
15 .read_from(&mut source)
16 .map(|dom| Document::from_markup5ever(&dom.document))
17}
18
19#[cfg(test)]
20mod tests {
21 use super::{
22 super::{element::Element, node::Node},
23 *,
24 };
25 use alloc::sync::Arc;
26 use pretty_assertions::assert_eq;
27
28 #[test]
29 fn parse_empty_string() {
30 assert_eq!(
31 parse("").unwrap(),
32 Document::new(vec![Arc::new(Node::Element(Element::new(
33 "html".to_string(),
34 vec![],
35 vec![
36 Arc::new(Node::Element(Element::new(
37 "head".to_string(),
38 vec![],
39 vec![]
40 ))),
41 Arc::new(Node::Element(Element::new(
42 "body".to_string(),
43 vec![],
44 vec![]
45 ))),
46 ],
47 )))])
48 );
49 }
50
51 #[test]
52 fn parse_simple_html() {
53 assert_eq!(
54 parse("<html><body><p>Hello</p></body></html>").unwrap(),
55 Document::new(vec![Arc::new(Node::Element(Element::new(
56 "html".to_string(),
57 vec![],
58 vec![
59 Arc::new(Node::Element(Element::new(
60 "head".to_string(),
61 vec![],
62 vec![]
63 ))),
64 Arc::new(Node::Element(Element::new(
65 "body".to_string(),
66 vec![],
67 vec![Arc::new(Node::Element(Element::new(
68 "p".to_string(),
69 vec![],
70 vec![Arc::new(Node::Text("Hello".to_string()))],
71 )))],
72 ))),
73 ],
74 )))])
75 );
76 }
77
78 #[test]
79 fn parse_with_attributes() {
80 assert_eq!(
81 parse("<html><body><p class=\"foo\">Hello</p></body></html>").unwrap(),
82 Document::new(vec![Arc::new(Node::Element(Element::new(
83 "html".to_string(),
84 vec![],
85 vec![
86 Arc::new(Node::Element(Element::new(
87 "head".to_string(),
88 vec![],
89 vec![]
90 ))),
91 Arc::new(Node::Element(Element::new(
92 "body".to_string(),
93 vec![],
94 vec![Arc::new(Node::Element(Element::new(
95 "p".to_string(),
96 vec![("class".to_string(), "foo".to_string())],
97 vec![Arc::new(Node::Text("Hello".to_string()))],
98 )))],
99 ))),
100 ],
101 )))])
102 );
103 }
104
105 #[test]
106 fn ignore_comments() {
107 assert_eq!(
108 parse("<html><body><!-- comment --><p>Hello</p></body></html>").unwrap(),
109 Document::new(vec![Arc::new(Node::Element(Element::new(
110 "html".to_string(),
111 vec![],
112 vec![
113 Arc::new(Node::Element(Element::new(
114 "head".to_string(),
115 vec![],
116 vec![]
117 ))),
118 Arc::new(Node::Element(Element::new(
119 "body".to_string(),
120 vec![],
121 vec![Arc::new(Node::Element(Element::new(
122 "p".to_string(),
123 vec![],
124 vec![Arc::new(Node::Text("Hello".to_string()))],
125 )))],
126 ))),
127 ],
128 )))])
129 );
130 }
131}