sauron_html_parser/
lib.rs1#![deny(warnings)]
2
3use std::{fmt, io, ops::Deref};
4
5use rphtml::{
6 config::ParseOptions,
7 parser::{Doc, NodeType},
8 types::BoxDynError,
9};
10
11use sauron_core::{
12 html::{attributes::*, lookup, *},
13 vdom::{AttributeValue, Node, Value},
14};
15
16#[derive(Debug, thiserror::Error)]
18pub enum ParseError {
19 #[error("{0}")]
21 IoError(#[from] io::Error),
22 #[error("{0}")]
24 FmtError(#[from] fmt::Error),
25 #[error("{0}")]
27 RpHtmlError(#[from] BoxDynError),
28 #[error("Invalid tag: {0}")]
30 InvalidTag(String),
31}
32
33pub fn raw_html<MSG>(html: &str) -> Node<MSG> {
36 let html = html_escape::decode_html_entities(html);
38 parse_html(&html)
39 .expect("must be ok")
40 .expect("must have a node")
41}
42
43pub fn parse_html<MSG>(html: &str) -> Result<Option<Node<MSG>>, ParseError> {
46 let doc = Doc::parse(
47 html,
48 ParseOptions {
49 case_sensitive_tagname: false,
50 allow_self_closing: true,
51 auto_fix_unclosed_tag: true,
52 auto_fix_unexpected_endtag: true,
53 auto_fix_unescaped_lt: true,
54 },
55 )?;
56 process_node(doc.get_root_node().borrow().deref())
57}
58
59fn process_node<MSG>(node: &rphtml::parser::Node) -> Result<Option<Node<MSG>>, ParseError> {
63 let content = if let Some(content) = &node.content {
64 let content = String::from_iter(content.iter());
65 Some(content)
66 } else {
67 None
68 };
69
70 let mut child_nodes = if let Some(childs) = &node.childs {
71 childs
72 .iter()
73 .flat_map(|child| process_node(child.borrow().deref()).ok().flatten())
74 .collect()
75 } else {
76 vec![]
77 };
78
79 match node.node_type {
80 NodeType::Tag => {
81 let tag = &node.meta.as_ref().expect("must have a tag");
82 let tag_name = String::from_iter(tag.borrow().name.iter());
83 if let Some(html_tag) = lookup::match_tag(&tag_name) {
84 let is_self_closing = HTML_SC_TAGS.contains(&html_tag);
85 let attributes: Vec<Attribute<MSG>> = tag
86 .borrow()
87 .attrs
88 .iter()
89 .filter_map(|attr| {
90 attr.key.as_ref().and_then(|key| {
91 let key = String::from_iter(key.content.iter());
92 if let Some(attr_key) = lookup::match_attribute(&key) {
93 let value = if let Some(value) = &attr.value {
94 let value = String::from_iter(value.content.iter());
95 AttributeValue::Simple(Value::from(value))
96 } else {
97 AttributeValue::Empty
98 };
99 Some(Attribute::new(None, attr_key, value))
100 } else {
101 log::warn!("Not a standard html attribute: {}", key);
102 None
103 }
104 })
105 })
106 .collect();
107
108 Ok(Some(html_element(
109 None,
110 html_tag,
111 attributes,
112 child_nodes,
113 is_self_closing,
114 )))
115 } else {
116 log::error!("invalid tag: {}", tag_name);
117 Err(ParseError::InvalidTag(tag_name))
118 }
119 }
120 NodeType::Text => {
121 let content = content.expect("must have a content");
122 Ok(Some(text(content)))
123 }
124 NodeType::AbstractRoot => {
125 let child_nodes_len = child_nodes.len();
126 match child_nodes_len {
127 0 => Ok(Some(node_list([]))),
128 1 => Ok(Some(child_nodes.remove(0))),
129 _ => Ok(Some(node_list(child_nodes))),
130 }
131 }
132 _ => Ok(None),
133 }
134}