1use html5ever::ParseOpts;
4use html5ever::tokenizer::TokenizerOpts;
5use html5ever::tree_builder::TreeBuilderOpts;
6use std::borrow::Cow;
7use std::cell::{Cell, Ref, RefCell, RefMut};
8
9use blitz_dom::node::Attribute;
10use blitz_dom::{DocumentMutator, HtmlParserProvider};
11use html5ever::{
12 QualName,
13 tendril::{StrTendril, TendrilSink},
14 tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink},
15};
16
17fn html5ever_to_blitz_attr(attr: html5ever::Attribute) -> Attribute {
20 Attribute {
21 name: attr.name,
22 value: attr.value.to_string(),
23 }
24}
25
26#[derive(Copy, Clone, Default, Debug)]
27pub struct HtmlProvider;
28
29impl HtmlParserProvider for HtmlProvider {
30 fn parse_inner_html<'m2, 'doc2>(
31 &self,
32 mutr: &'m2 mut DocumentMutator<'doc2>,
33 element_id: usize,
34 html: &str,
35 ) {
36 DocumentHtmlParser::parse_inner_html_into_mutator(mutr, element_id, html);
37 }
38}
39
40pub struct DocumentHtmlParser<'m, 'doc> {
41 document_mutator: RefCell<&'m mut DocumentMutator<'doc>>,
42
43 pub errors: RefCell<Vec<Cow<'static, str>>>,
45
46 pub quirks_mode: Cell<QuirksMode>,
48 pub is_xml: bool,
49}
50
51impl<'m, 'doc> DocumentHtmlParser<'m, 'doc> {
52 #[track_caller]
53 fn mutr(&self) -> RefMut<'_, &'m mut DocumentMutator<'doc>> {
55 self.document_mutator.borrow_mut()
56 }
57}
58
59impl<'m, 'doc> DocumentHtmlParser<'m, 'doc> {
60 pub fn new(mutr: &'m mut DocumentMutator<'doc>) -> DocumentHtmlParser<'m, 'doc> {
61 DocumentHtmlParser {
62 document_mutator: RefCell::new(mutr),
63 errors: RefCell::new(Vec::new()),
64 quirks_mode: Cell::new(QuirksMode::NoQuirks),
65 is_xml: false,
66 }
67 }
68
69 pub fn parse_into_mutator<'a, 'd>(mutr: &'a mut DocumentMutator<'d>, html: &str) {
70 let mut sink = DocumentHtmlParser::new(mutr);
71
72 let is_xhtml_doc = html.starts_with("<?xml")
73 || html.starts_with("<!DOCTYPE") && {
74 let first_line = html.lines().next().unwrap();
75 first_line.contains("XHTML") || first_line.contains("xhtml")
76 };
77
78 if is_xhtml_doc {
79 sink.is_xml = true;
81 xml5ever::driver::parse_document(sink, Default::default())
82 .from_utf8()
83 .read_from(&mut html.as_bytes())
84 .unwrap();
85 } else {
86 sink.is_xml = false;
88 let opts = ParseOpts {
89 tokenizer: TokenizerOpts::default(),
90 tree_builder: TreeBuilderOpts {
91 exact_errors: false,
92 scripting_enabled: false, iframe_srcdoc: false,
94 drop_doctype: true,
95 quirks_mode: QuirksMode::NoQuirks,
96 },
97 };
98 html5ever::parse_document(sink, opts)
99 .from_utf8()
100 .read_from(&mut html.as_bytes())
101 .unwrap();
102 }
103 }
104
105 pub fn parse_inner_html_into_mutator<'a, 'd>(
106 mutr: &'a mut DocumentMutator<'d>,
107 element_id: usize,
108 html: &str,
109 ) {
110 let sink = DocumentHtmlParser::new(mutr);
111
112 let opts = ParseOpts {
113 tokenizer: TokenizerOpts::default(),
114 tree_builder: TreeBuilderOpts {
115 exact_errors: false,
116 scripting_enabled: false, iframe_srcdoc: false,
118 drop_doctype: true,
119 quirks_mode: QuirksMode::NoQuirks,
120 },
121 };
122 html5ever::driver::parse_fragment_for_element(sink, opts, element_id, false, None)
123 .from_utf8()
124 .read_from(&mut html.as_bytes())
125 .unwrap();
126
127 let fragment_root_id = mutr.last_child_id(0).unwrap();
130 let child_ids = mutr.child_ids(fragment_root_id);
131 mutr.append_children(element_id, &child_ids);
132 mutr.remove_node(fragment_root_id);
133 }
134}
135
136impl<'m, 'doc> TreeSink for DocumentHtmlParser<'m, 'doc> {
137 type Output = ();
138
139 type Handle = usize;
141
142 type ElemName<'a>
143 = Ref<'a, QualName>
144 where
145 Self: 'a;
146
147 fn finish(self) -> Self::Output {
148 for error in self.errors.borrow().iter() {
149 println!("ERROR: {error}");
150 }
151 }
152
153 fn parse_error(&self, msg: Cow<'static, str>) {
154 self.errors.borrow_mut().push(msg);
155 }
156
157 fn get_document(&self) -> Self::Handle {
158 0
159 }
160
161 fn elem_name<'a>(&'a self, target: &'a Self::Handle) -> Self::ElemName<'a> {
162 Ref::map(self.document_mutator.borrow(), |docm| {
163 docm.element_name(*target)
164 .expect("TreeSink::elem_name called on a node which is not an element!")
165 })
166 }
167
168 fn create_element(
169 &self,
170 name: QualName,
171 attrs: Vec<html5ever::Attribute>,
172 _flags: ElementFlags,
173 ) -> Self::Handle {
174 let attrs = attrs.into_iter().map(html5ever_to_blitz_attr).collect();
175 self.mutr().create_element(name, attrs)
176 }
177
178 fn create_comment(&self, _text: StrTendril) -> Self::Handle {
179 self.mutr().create_comment_node()
180 }
181
182 fn create_pi(&self, _target: StrTendril, _data: StrTendril) -> Self::Handle {
183 self.mutr().create_comment_node()
184 }
185
186 fn append(&self, parent_id: &Self::Handle, child: NodeOrText<Self::Handle>) {
187 match child {
188 NodeOrText::AppendNode(id) => self.mutr().append_children(*parent_id, &[id]),
189 NodeOrText::AppendText(text) => {
192 let last_child_id = self.mutr().last_child_id(*parent_id);
193 let has_appended = if let Some(id) = last_child_id {
194 self.mutr().append_text_to_node(id, &text).is_ok()
195 } else {
196 false
197 };
198 if !has_appended {
199 let new_child_id = self.mutr().create_text_node(&text);
200 self.mutr().append_children(*parent_id, &[new_child_id]);
201 }
202 }
203 }
204 }
205
206 fn append_before_sibling(&self, sibling_id: &Self::Handle, new_node: NodeOrText<Self::Handle>) {
209 match new_node {
210 NodeOrText::AppendNode(id) => self.mutr().insert_nodes_before(*sibling_id, &[id]),
211 NodeOrText::AppendText(text) => {
214 let previous_sibling_id = self.mutr().previous_sibling_id(*sibling_id);
215 let has_appended = if let Some(id) = previous_sibling_id {
216 self.mutr().append_text_to_node(id, &text).is_ok()
217 } else {
218 false
219 };
220 if !has_appended {
221 let new_child_id = self.mutr().create_text_node(&text);
222 self.mutr()
223 .insert_nodes_before(*sibling_id, &[new_child_id]);
224 }
225 }
226 };
227 }
228
229 fn append_based_on_parent_node(
230 &self,
231 element: &Self::Handle,
232 prev_element: &Self::Handle,
233 child: NodeOrText<Self::Handle>,
234 ) {
235 if self.mutr().node_has_parent(*element) {
236 self.append_before_sibling(element, child);
237 } else {
238 self.append(prev_element, child);
239 }
240 }
241
242 fn append_doctype_to_document(
243 &self,
244 _name: StrTendril,
245 _public_id: StrTendril,
246 _system_id: StrTendril,
247 ) {
248 }
250
251 fn get_template_contents(&self, target: &Self::Handle) -> Self::Handle {
252 *target
254 }
255
256 fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
257 x == y
258 }
259
260 fn set_quirks_mode(&self, mode: QuirksMode) {
261 self.quirks_mode.set(mode);
262 }
263
264 fn add_attrs_if_missing(&self, target: &Self::Handle, attrs: Vec<html5ever::Attribute>) {
265 let attrs = attrs.into_iter().map(html5ever_to_blitz_attr).collect();
266 self.mutr().add_attrs_if_missing(*target, attrs);
267 }
268
269 fn remove_from_parent(&self, target: &Self::Handle) {
270 self.mutr().remove_node(*target);
271 }
272
273 fn reparent_children(&self, old_parent_id: &Self::Handle, new_parent_id: &Self::Handle) {
274 self.mutr()
275 .reparent_children(*old_parent_id, *new_parent_id);
276 }
277}
278
279#[test]
280fn parses_some_html() {
281 use blitz_dom::{BaseDocument, DocumentConfig};
282
283 let html = "<!DOCTYPE html><html><body><h1>hello world</h1></body></html>";
284 let mut doc = BaseDocument::new(DocumentConfig::default());
285 let mut mutr = doc.mutate();
286 let sink = DocumentHtmlParser::new(&mut mutr);
287
288 html5ever::parse_document(sink, Default::default())
289 .from_utf8()
290 .read_from(&mut html.as_bytes())
291 .unwrap();
292
293 drop(mutr);
294 doc.print_tree()
295
296 }