1use html5ever::ParseOpts;
4use html5ever::tokenizer::TokenizerOpts;
5use html5ever::tree_builder::TreeBuilderOpts;
6use std::borrow::Cow;
7use std::cell::{Cell, Ref, RefCell, RefMut};
8
9use blitz_dom::node::Attribute;
10use blitz_dom::{DocumentMutator, HtmlParserProvider};
11use html5ever::{
12 QualName,
13 tendril::{StrTendril, TendrilSink},
14 tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink},
15};
16
17fn html5ever_to_blitz_attr(attr: html5ever::Attribute) -> Attribute {
20 Attribute {
21 name: attr.name,
22 value: attr.value.to_string(),
23 }
24}
25
26#[derive(Copy, Clone, Default, Debug)]
27pub struct HtmlProvider;
28
29impl HtmlParserProvider for HtmlProvider {
30 fn parse_inner_html<'m2, 'doc2>(
31 &self,
32 mutr: &'m2 mut DocumentMutator<'doc2>,
33 element_id: usize,
34 html: &str,
35 ) {
36 DocumentHtmlParser::parse_inner_html_into_mutator(mutr, element_id, html);
37 }
38}
39
40pub struct DocumentHtmlParser<'m, 'doc> {
41 document_mutator: RefCell<&'m mut DocumentMutator<'doc>>,
42
43 pub errors: RefCell<Vec<Cow<'static, str>>>,
45
46 pub quirks_mode: Cell<QuirksMode>,
48 pub is_xml: bool,
49}
50
51impl<'m, 'doc> DocumentHtmlParser<'m, 'doc> {
52 #[track_caller]
53 fn mutr(&self) -> RefMut<'_, &'m mut DocumentMutator<'doc>> {
55 self.document_mutator.borrow_mut()
56 }
57}
58
59impl<'m, 'doc> DocumentHtmlParser<'m, 'doc> {
60 pub fn new(mutr: &'m mut DocumentMutator<'doc>) -> DocumentHtmlParser<'m, 'doc> {
61 DocumentHtmlParser {
62 document_mutator: RefCell::new(mutr),
63 errors: RefCell::new(Vec::new()),
64 quirks_mode: Cell::new(QuirksMode::NoQuirks),
65 is_xml: false,
66 }
67 }
68
69 pub fn parse_into_mutator<'a, 'd>(mutr: &'a mut DocumentMutator<'d>, html: &str) {
70 let mut sink = DocumentHtmlParser::new(mutr);
71
72 let is_xhtml_doc = html.starts_with("<?xml")
73 || html.starts_with("<!DOCTYPE") && {
74 let first_line = html.lines().next().unwrap();
75 first_line.contains("XHTML") || first_line.contains("xhtml")
76 };
77
78 if is_xhtml_doc {
79 sink.is_xml = true;
81 xml5ever::driver::parse_document(sink, Default::default())
82 .from_utf8()
83 .read_from(&mut html.as_bytes())
84 .unwrap();
85 } else {
86 sink.is_xml = false;
88 let opts = ParseOpts {
89 tokenizer: TokenizerOpts::default(),
90 tree_builder: TreeBuilderOpts {
91 exact_errors: false,
92 scripting_enabled: false, iframe_srcdoc: false,
94 drop_doctype: true,
95 quirks_mode: QuirksMode::NoQuirks,
96 },
97 };
98 html5ever::parse_document(sink, opts)
99 .from_utf8()
100 .read_from(&mut html.as_bytes())
101 .unwrap();
102 }
103 }
104
105 pub fn parse_inner_html_into_mutator<'a, 'd>(
106 mutr: &'a mut DocumentMutator<'d>,
107 element_id: usize,
108 html: &str,
109 ) {
110 let sink = DocumentHtmlParser::new(mutr);
111
112 let opts = ParseOpts {
113 tokenizer: TokenizerOpts::default(),
114 tree_builder: TreeBuilderOpts {
115 exact_errors: false,
116 scripting_enabled: false, iframe_srcdoc: false,
118 drop_doctype: true,
119 quirks_mode: QuirksMode::NoQuirks,
120 },
121 };
122 html5ever::driver::parse_fragment_for_element(sink, opts, element_id, false, None)
123 .from_utf8()
124 .read_from(&mut html.as_bytes())
125 .unwrap();
126
127 let fragment_root_id = mutr.last_child_id(0).unwrap();
130 let child_ids = mutr.child_ids(fragment_root_id);
131 mutr.append_children(element_id, &child_ids);
132 mutr.remove_node(fragment_root_id);
133 }
134}
135
136impl<'m, 'doc> TreeSink for DocumentHtmlParser<'m, 'doc> {
137 type Output = ();
138
139 type Handle = usize;
141
142 type ElemName<'a>
143 = Ref<'a, QualName>
144 where
145 Self: 'a;
146
147 fn finish(self) -> Self::Output {
148 #[cfg(feature = "tracing")]
149 for error in self.errors.borrow().iter() {
150 tracing::error!("{error}");
151 }
152 }
153
154 fn parse_error(&self, msg: Cow<'static, str>) {
155 self.errors.borrow_mut().push(msg);
156 }
157
158 fn get_document(&self) -> Self::Handle {
159 0
160 }
161
162 fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> Self::ElemName<'a> {
163 Ref::map(self.document_mutator.borrow(), |docm| {
164 docm.element_name(*target)
165 .expect("TreeSink::elem_name called on a node which is not an element!")
166 })
167 }
168
169 fn create_element(
170 &self,
171 name: QualName,
172 attrs: Vec<html5ever::Attribute>,
173 _flags: ElementFlags,
174 ) -> Self::Handle {
175 let attrs = attrs.into_iter().map(html5ever_to_blitz_attr).collect();
176 self.mutr().create_element(name, attrs)
177 }
178
179 fn create_comment(&self, _text: StrTendril) -> Self::Handle {
180 self.mutr().create_comment_node()
181 }
182
183 fn create_pi(&self, _target: StrTendril, _data: StrTendril) -> Self::Handle {
184 self.mutr().create_comment_node()
185 }
186
187 fn append(&self, parent_id: &Self::Handle, child: NodeOrText<Self::Handle>) {
188 match child {
189 NodeOrText::AppendNode(id) => self.mutr().append_children(*parent_id, &[id]),
190 NodeOrText::AppendText(text) => {
193 let last_child_id = self.mutr().last_child_id(*parent_id);
194 let has_appended = if let Some(id) = last_child_id {
195 self.mutr().append_text_to_node(id, &text).is_ok()
196 } else {
197 false
198 };
199 if !has_appended {
200 let new_child_id = self.mutr().create_text_node(&text);
201 self.mutr().append_children(*parent_id, &[new_child_id]);
202 }
203 }
204 }
205 }
206
207 fn append_before_sibling(&self, sibling_id: &Self::Handle, new_node: NodeOrText<Self::Handle>) {
210 match new_node {
211 NodeOrText::AppendNode(id) => self.mutr().insert_nodes_before(*sibling_id, &[id]),
212 NodeOrText::AppendText(text) => {
215 let previous_sibling_id = self.mutr().previous_sibling_id(*sibling_id);
216 let has_appended = if let Some(id) = previous_sibling_id {
217 self.mutr().append_text_to_node(id, &text).is_ok()
218 } else {
219 false
220 };
221 if !has_appended {
222 let new_child_id = self.mutr().create_text_node(&text);
223 self.mutr()
224 .insert_nodes_before(*sibling_id, &[new_child_id]);
225 }
226 }
227 };
228 }
229
230 fn append_based_on_parent_node(
231 &self,
232 element: &Self::Handle,
233 prev_element: &Self::Handle,
234 child: NodeOrText<Self::Handle>,
235 ) {
236 if self.mutr().node_has_parent(*element) {
237 self.append_before_sibling(element, child);
238 } else {
239 self.append(prev_element, child);
240 }
241 }
242
243 fn append_doctype_to_document(
244 &self,
245 _name: StrTendril,
246 _public_id: StrTendril,
247 _system_id: StrTendril,
248 ) {
249 }
251
252 fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle {
253 *target
255 }
256
257 fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
258 x == y
259 }
260
261 fn set_quirks_mode(&self, mode: QuirksMode) {
262 self.quirks_mode.set(mode);
263 }
264
265 fn add_attrs_if_missing(&self, target: &Self::Handle, attrs: Vec<html5ever::Attribute>) {
266 let attrs = attrs.into_iter().map(html5ever_to_blitz_attr).collect();
267 self.mutr().add_attrs_if_missing(*target, attrs);
268 }
269
270 fn remove_from_parent(&self, target: &Self::Handle) {
271 self.mutr().remove_node(*target);
272 }
273
274 fn reparent_children(&self, old_parent_id: &Self::Handle, new_parent_id: &Self::Handle) {
275 self.mutr()
276 .reparent_children(*old_parent_id, *new_parent_id);
277 }
278}
279
280#[test]
281fn parses_some_html() {
282 use blitz_dom::{BaseDocument, DocumentConfig};
283
284 let html = "<!DOCTYPE html><html><body><h1>hello world</h1></body></html>";
285 let mut doc = BaseDocument::new(DocumentConfig::default());
286 let mut mutr = doc.mutate();
287 let sink = DocumentHtmlParser::new(&mut mutr);
288
289 html5ever::parse_document(sink, Default::default())
290 .from_utf8()
291 .read_from(&mut html.as_bytes())
292 .unwrap();
293
294 drop(mutr);
295 doc.print_tree()
296
297 }