1use html5ever::{
4 local_name, namespace_url, ns, parse_document, parse_fragment,
5 tendril::TendrilSink, QualName,
6};
7use markup5ever_rcdom::{Handle, NodeData, RcDom};
8use once_cell::sync::Lazy;
9use sauron_core::{
10 html::{
11 attributes,
12 attributes::{
13 AttributeValue, Style, HTML_ATTRS, HTML_ATTRS_SPECIAL, HTML_STYLES,
14 },
15 html_element_self_closing,
16 tags::{
17 HTML_SC_TAGS, HTML_TAGS, HTML_TAGS_NON_COMMON,
18 HTML_TAGS_WITH_MACRO_NON_COMMON,
19 },
20 text,
21 },
22 mt_dom,
23 svg::{
24 attributes::{SVG_ATTRS, SVG_ATTRS_SPECIAL, SVG_ATTRS_XLINK},
25 tags::{SVG_TAGS, SVG_TAGS_NON_COMMON, SVG_TAGS_SPECIAL},
26 SVG_NAMESPACE,
27 },
28 Attribute, Node,
29};
30use std::collections::HashSet;
31use std::iter::FromIterator;
32use std::{fmt, io};
33use thiserror::Error;
34
35static ALL_SVG_TAGS: Lazy<HashSet<&&'static str>> = Lazy::new(|| {
36 HashSet::from_iter(
37 SVG_TAGS
38 .iter()
39 .chain(SVG_TAGS_NON_COMMON.iter())
40 .chain(SVG_TAGS_SPECIAL.iter().map(|(_func, t)| t)),
41 )
42});
43
44static ALL_HTML_TAGS: Lazy<HashSet<&&'static str>> = Lazy::new(|| {
58 HashSet::from_iter(
59 HTML_TAGS
60 .iter()
61 .chain(HTML_SC_TAGS.iter())
62 .chain(HTML_TAGS_NON_COMMON.iter())
63 .chain(HTML_TAGS_WITH_MACRO_NON_COMMON.iter()),
64 )
65});
66
67static SELF_CLOSING_TAGS: Lazy<HashSet<&&'static str>> =
68 Lazy::new(|| HashSet::from_iter(HTML_SC_TAGS.iter()));
69
70#[derive(Debug, Error)]
72pub enum ParseError {
73 #[error("Generic Error {0}")]
75 Generic(String),
76 #[error("{0}")]
78 IoError(#[from] io::Error),
79 #[error("{0}")]
81 FmtError(#[from] fmt::Error),
82}
83
84fn match_tag(tag: &str) -> Option<&'static str> {
85 ALL_HTML_TAGS
86 .iter()
87 .chain(ALL_SVG_TAGS.iter())
88 .find(|item| item.eq_ignore_ascii_case(&tag))
89 .map(|item| **item)
90}
91
92fn match_attribute(key: &str) -> Option<&'static str> {
93 HTML_ATTRS
94 .iter()
95 .chain(SVG_ATTRS.iter())
96 .find(|att| att.eq_ignore_ascii_case(&key))
97 .map(|att| *att)
98 .or_else(|| {
99 HTML_ATTRS_SPECIAL
100 .iter()
101 .chain(SVG_ATTRS_SPECIAL.iter())
102 .chain(SVG_ATTRS_XLINK.iter())
103 .find(|(_func, att)| att.eq_ignore_ascii_case(&key))
104 .map(|(func, _att)| *func)
105 })
106}
107
108fn match_style_name(key: &str) -> Option<&'static str> {
109 HTML_STYLES
110 .iter()
111 .find(|name| name.eq_ignore_ascii_case(&key))
112 .map(|name| *name)
113}
114
115pub fn match_attribute_function(key: &str) -> Option<&'static str> {
117 HTML_ATTRS
118 .iter()
119 .chain(SVG_ATTRS.iter())
120 .find(|att| att.eq_ignore_ascii_case(key))
121 .map(|att| *att)
122 .or_else(|| {
123 HTML_ATTRS_SPECIAL
124 .iter()
125 .chain(SVG_ATTRS_SPECIAL.iter())
126 .chain(SVG_ATTRS_XLINK.iter())
127 .find(|(func, _att)| func.eq_ignore_ascii_case(key))
128 .map(|(func, _att)| *func)
129 })
130}
131
132pub fn tag_namespace(tag: &str) -> Option<&'static str> {
146 let is_html = ALL_HTML_TAGS.contains(&tag);
147 let is_svg = ALL_SVG_TAGS.contains(&tag);
148 if !is_html {
149 if is_svg {
150 Some(SVG_NAMESPACE)
155 } else {
156 None
157 }
158 } else {
159 None
160 }
161}
162
163pub fn is_self_closing(tag: &str) -> bool {
165 SELF_CLOSING_TAGS.contains(&tag)
166}
167
168fn extract_attributes<MSG>(
169 attrs: &Vec<html5ever::Attribute>,
170) -> Vec<Attribute<MSG>> {
171 attrs
172 .iter()
173 .filter_map(|att| {
174 let key = att.name.local.to_string();
175 let value = att.value.to_string();
176 if key == "style" {
177 let styles = extract_styles(&value);
178 Some(mt_dom::attr("style", AttributeValue::from_styles(styles)))
179 } else if let Some(attr) = match_attribute(&key) {
180 Some(attributes::attr(attr, value))
181 } else {
182 log::warn!("Not a standard html attribute: {}", key);
183 None
184 }
185 })
186 .collect()
187}
188
189fn extract_styles(style: &str) -> Vec<Style> {
192 let mut extracted = vec![];
193 println!("processing style: {}", style);
194 let mut single_styles: Vec<&str> = style.split(";").collect();
195 single_styles.retain(|item| !item.trim().is_empty());
196 for single in single_styles {
197 let key_value: Vec<&str> = single.split(":").collect();
198 assert_eq!(key_value.len(), 2);
199 let key = key_value[0].trim();
200 let value = key_value[1].trim();
201 println!("style [{}] = [{}]", key, value);
202 if let Some(match_style) = match_style_name(key) {
203 extracted.push(Style::new(match_style, value.to_string().into()));
204 }
205 }
206 extracted
207}
208
209fn process_children<MSG>(node: &Handle) -> Vec<Node<MSG>> {
210 node.children
211 .borrow()
212 .iter()
213 .filter_map(|child_node| process_node(child_node))
214 .collect()
215}
216
217fn process_node<MSG>(node: &Handle) -> Option<Node<MSG>> {
218 match &node.data {
219 NodeData::Text { ref contents } => {
220 let text_content = contents.borrow().to_string();
221 if text_content.trim().is_empty() {
222 None
223 } else {
224 Some(text(text_content))
225 }
226 }
227
228 NodeData::Element {
229 ref name,
230 ref attrs,
231 ..
232 } => {
233 let tag = name.local.to_string();
234 if let Some(html_tag) = match_tag(&tag) {
235 let children_nodes = process_children(node);
236 let attributes = extract_attributes(&attrs.borrow());
237 let is_self_closing = HTML_SC_TAGS.contains(&html_tag);
238 Some(html_element_self_closing(
239 html_tag,
240 attributes,
241 children_nodes,
242 is_self_closing,
243 ))
244 } else {
245 log::warn!("Invalid tag: {}", tag);
246 None
247 }
248 }
249 NodeData::Document => {
250 let mut children_nodes = process_children(node);
251 let children_len = children_nodes.len();
252 if children_len == 1 {
253 Some(children_nodes.remove(0))
254 } else if children_len == 2 {
255 Some(children_nodes.remove(1))
256 } else {
257 None
258 }
259 }
260 _ => None,
261 }
262}
263
264pub fn parse<MSG>(html: &str) -> Result<Option<Node<MSG>>, ParseError> {
266 let html_start = html.trim_start();
267 let parser = if html_start.starts_with("<html")
268 || html_start.starts_with("<!DOCTYPE")
269 {
270 parse_document(RcDom::default(), Default::default())
271 } else {
272 parse_fragment(
273 RcDom::default(),
274 Default::default(),
275 QualName::new(None, ns!(html), local_name!("div")),
276 vec![],
277 )
278 };
279
280 let dom = parser.one(html);
281 let node = process_node(&dom.document);
282 Ok(node)
283}
284
285pub fn parse_simple<MSG>(html: &str) -> Result<Vec<Node<MSG>>, ParseError> {
287 if let Some(html) = parse(html)? {
288 if let Some(element) = html.take_element() {
289 assert_eq!(*element.tag(), "html");
290 Ok(element.take_children())
291 } else {
292 Ok(vec![])
293 }
294 } else {
295 Ok(vec![])
296 }
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302 use sauron_core::{html::div, Render};
303
304 #[test]
305 fn test_html_child() {
306 let html = r#"<article class="side-to-side">
307 <div>
308 This is div content1
309 </div>
310 <footer>
311 This is footer
312 </footer>
313</article>"#;
314 let node: Vec<Node<()>> = parse_simple(html).expect("must parse");
315 println!("node: {:#?}", node);
316 let one = div(vec![], node);
317 println!("one: {}", one.render_to_string());
318 }
319
320 #[test]
321 fn tag_namespace_is_none_in_html_div() {
322 assert_eq!(None, tag_namespace("div"));
323 assert_eq!(None, tag_namespace("span"));
324 assert_eq!(None, tag_namespace("a"));
325 assert_eq!(None, tag_namespace("title"));
326 assert_eq!(None, tag_namespace("style"));
327 assert_eq!(None, tag_namespace("script"));
328 }
329 #[test]
330 fn tag_namespace_in_svg_should_return_svg_namespace() {
331 assert_eq!(Some(SVG_NAMESPACE), tag_namespace("svg"));
332 assert_eq!(Some(SVG_NAMESPACE), tag_namespace("rect"));
333 assert_eq!(Some(SVG_NAMESPACE), tag_namespace("line"));
334 assert_eq!(Some(SVG_NAMESPACE), tag_namespace("circle"));
335 }
336}