silkenweb_parse/
lib.rs

1//! # Parse HTML Fragments
2//!
3//! This module provides tools for parsing HTML fragments. An HTML fragment is a
4//! (possibly empty) list of HTML text or element nodes. For example:
5//!
6//! ```html
7//! <p>This is an HTML fragment</p>
8//! ```
9//!
10//! - Parsing on the browser is done using `Element::innerHtml`. On the server
11//!   it uses [`scraper`].
12//! - If any errors are present, a best effort is made to parse the HTML.
13//! - Any empty text nodes are removed.
14//! - Attributes are sorted to make the result more testable.
15extern crate proc_macro;
16
17use proc_macro::TokenStream;
18use quote::quote;
19use silkenweb::{
20    cfg_browser,
21    dom::Dom,
22    node::{
23        element::{Element, GenericElement, Namespace, ParentElement},
24        Node, Text,
25    },
26};
27
28enum DomNode {
29    Element {
30        ns: String,
31        name: String,
32        attributes: Vec<(String, String)>,
33        children: Vec<Self>,
34    },
35    Text(String),
36}
37
38/// Convert an HTML fragment to Silkenweb nodes.
39///
40/// See the [module][`self`] documentation for details.
41///
42/// # Example
43///
44/// ```
45/// # use silkenweb_parse::html_to_nodes;
46/// # use silkenweb::node::Node;
47/// let html_fragment = "<p>Node 1</p><p>Node 2</p>";
48/// let nodes: Vec<Node> = html_to_nodes(html_fragment);
49///
50/// assert_eq!(format!("{}{}", nodes[0], nodes[1]), html_fragment);
51/// ```
52pub fn html_to_nodes<D: Dom>(html: &str) -> Vec<Node<D>> {
53    tree_to_nodes(arch::parse_html(html))
54}
55
56fn tree_to_nodes<D: Dom>(nodes: Vec<DomNode>) -> Vec<Node<D>> {
57    nodes
58        .into_iter()
59        .filter_map(|src_node| match src_node {
60            DomNode::Element {
61                ns,
62                name,
63                mut attributes,
64                children,
65            } => {
66                // Sort attributes for testability
67                attributes.sort();
68
69                let mut elem = GenericElement::new(&Namespace::Other(ns), &name);
70
71                for (name, value) in attributes {
72                    elem = elem.attribute(&name, value);
73                }
74
75                Some(elem.children(tree_to_nodes(children)).into())
76            }
77            DomNode::Text(text) => {
78                if text.trim().is_empty() {
79                    None
80                } else {
81                    Some(Text::new(&text).into())
82                }
83            }
84        })
85        .collect()
86}
87
88/// Convert an HTML fragment to Silkenweb node expressions.
89///
90/// This is for writing your own proc macros to parse HTML fragments at compile
91/// time. See the [module][`self`] documentation for details about the parsing.
92///
93/// The resulting [`TokenStream`]s are expressions with type
94/// [`Node`][`silkenweb::node::Node`]. The [`Dom`][`silkenweb::dom::Dom`] type
95/// is left unspecified, so may need to be specified if it can't be determined
96/// with type inference.
97pub fn html_to_tokens(dom_type: TokenStream, html: &str) -> Vec<TokenStream> {
98    tree_to_tokens(dom_type.into(), arch::parse_html(html))
99}
100
101fn tree_to_tokens(dom_type: proc_macro2::TokenStream, nodes: Vec<DomNode>) -> Vec<TokenStream> {
102    nodes
103        .into_iter()
104        .filter_map(|src_node| match src_node {
105            DomNode::Element {
106                ns,
107                name,
108                mut attributes,
109                children,
110            } => {
111                // Sort attributes for testability
112                attributes.sort();
113
114                let children =
115                    tree_to_tokens(dom_type.clone(), children).into_iter().map(proc_macro2::TokenStream::from);
116                let attributes = attributes.into_iter().map(|(name, value)| {
117                    quote! { elem = elem.attribute(#name, #value); }
118                });
119
120                let children = if children.len() == 0 {
121                    quote! {[] as [::silkenweb::node::Node<#dom_type>; 0]}
122                } else {
123                    quote! {[#(#children),*]}
124                };
125
126                Some(quote! {{
127                    use ::silkenweb::node::element::{Element, GenericElement, Namespace, ParentElement};
128                    let mut elem = GenericElement::<#dom_type>::new(
129                        &Namespace::Other(#ns.to_string()),
130                        #name
131                    );
132
133                    #(#attributes)*
134
135                    elem.children(#children)
136                }})
137            }
138            DomNode::Text(text) => {
139                if text.trim().is_empty() {
140                    None
141                } else {
142                    Some(quote!(::silkenweb::node::Text::<#dom_type>::new(#text)))
143                }
144            }
145        })
146        .map(|node| quote!(::silkenweb::node::Node::<#dom_type>::from(#node)).into())
147        .collect::<Vec<_>>()
148}
149
150#[cfg_browser(false)]
151mod arch {
152    use ego_tree::NodeRef;
153    use scraper::Html;
154
155    use crate::DomNode;
156
157    pub(super) fn parse_html(html: &str) -> Vec<DomNode> {
158        let fragment = Html::parse_fragment(html);
159        tree_to_nodes(&fragment.root_element())
160    }
161
162    fn tree_to_nodes(src_elem: &NodeRef<scraper::node::Node>) -> Vec<DomNode> {
163        src_elem
164            .children()
165            .filter_map(|src_node| {
166                if let Some(child) = src_node.value().as_element() {
167                    let ns = child.name.ns.to_string();
168                    let name = child.name.local.to_string();
169                    let attributes = child
170                        .attrs()
171                        .map(|(key, value)| (key.to_string(), value.to_string()))
172                        .collect();
173                    let children = tree_to_nodes(&src_node);
174
175                    Some(DomNode::Element {
176                        ns,
177                        name,
178                        attributes,
179                        children,
180                    })
181                } else {
182                    src_node
183                        .value()
184                        .as_text()
185                        .map(|text| DomNode::Text(text.to_string()))
186                }
187            })
188            .collect()
189    }
190}
191
192#[cfg_browser(true)]
193mod arch {
194    use silkenweb::{
195        dom::Wet,
196        node::element::{Element, GenericElement, Namespace},
197    };
198    use wasm_bindgen::JsCast;
199
200    use crate::DomNode;
201
202    pub(super) fn parse_html(html: &str) -> Vec<DomNode> {
203        let tmpl = GenericElement::<Wet>::new(&Namespace::Html, "template");
204        let tmpl_elem = tmpl
205            .handle()
206            .dom_element()
207            .dyn_into::<web_sys::HtmlTemplateElement>()
208            .unwrap();
209        tmpl_elem.set_inner_html(html);
210        first_child_to_nodes(tmpl_elem.content().first_child())
211    }
212
213    fn first_child_to_nodes(mut child: Option<web_sys::Node>) -> Vec<DomNode> {
214        let mut nodes = Vec::new();
215
216        while let Some(current) = child {
217            if let Some(src_elem) = current.dyn_ref::<web_sys::Element>() {
218                let ns = src_elem.namespace_uri().unwrap_or_default();
219                let name = src_elem.local_name();
220
221                let src_attributes = src_elem.attributes();
222                let mut attributes = Vec::new();
223
224                for item_index in 0.. {
225                    if let Some(attr) = src_attributes.item(item_index) {
226                        attributes.push((attr.name(), attr.value()));
227                    } else {
228                        break;
229                    }
230                }
231
232                let children = first_child_to_nodes(src_elem.first_child());
233
234                nodes.push(DomNode::Element {
235                    ns,
236                    name,
237                    attributes,
238                    children,
239                });
240            } else if let Some(text) = current.dyn_ref::<web_sys::Text>() {
241                nodes.push(DomNode::Text(
242                    text.text_content()
243                        .as_deref()
244                        .unwrap_or_default()
245                        .to_string(),
246                ));
247            }
248
249            child = current.next_sibling();
250        }
251
252        nodes
253    }
254}