1use super::{
2 attributes::should_ignore,
3 document::Document,
4 node::{ElementData, Node, NodeData, NodeId},
5};
6use html5ever::{
7 expanded_name, local_name, namespace_url, ns,
8 tendril::{StrTendril, TendrilSink},
9 tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink},
10 Attribute, ExpandedName, QualName,
11};
12use std::borrow::Cow;
13
14#[derive(Debug, Copy, Clone)]
15pub(crate) enum InliningMode {
16 Document,
18 Fragment,
20}
21
22pub(crate) fn parse_with_options(
24 bytes: &[u8],
25 preallocate_node_capacity: usize,
26 mode: InliningMode,
27) -> Document {
28 let sink = Sink {
29 document: Document::with_capacity(preallocate_node_capacity),
30 };
31 let options = html5ever::ParseOpts::default();
32 match mode {
33 InliningMode::Document => html5ever::parse_document(sink, options)
34 .from_utf8()
35 .one(bytes),
36 InliningMode::Fragment => {
37 let mut document = html5ever::parse_fragment(
38 sink,
39 options,
40 QualName::new(None, ns!(html), local_name!("")),
41 vec![],
42 )
43 .from_utf8()
44 .one(bytes);
45 let document_id = NodeId::document_id();
46 let context_element_id = NodeId::new(
47 document_id
48 .get()
49 .checked_add(2)
52 .expect("Document id is too small to overflow"),
53 );
54 document.reparent_children(context_element_id, document_id);
55 document
56 }
57 }
58}
59
60struct Sink {
63 document: Document,
65}
66
67impl Sink {
68 fn push_node(&mut self, data: NodeData) -> NodeId {
70 self.document.push_node(data)
71 }
72
73 fn push_element(
74 &mut self,
75 name: QualName,
76 attributes: Vec<Attribute>,
77 inlining_ignored: bool,
78 ) -> NodeId {
79 let node_id = self.push_node(NodeData::Element {
80 element: ElementData::new(name, attributes),
81 inlining_ignored,
82 });
83 self.document.push_element_id(node_id);
84 node_id
85 }
86
87 fn push_text(&mut self, text: StrTendril) -> NodeId {
88 self.push_node(NodeData::Text { text })
89 }
90
91 fn push_comment(&mut self, text: StrTendril) -> NodeId {
92 self.push_node(NodeData::Comment { text })
93 }
94
95 fn push_processing_instruction(&mut self, target: StrTendril, data: StrTendril) -> NodeId {
96 self.push_node(NodeData::ProcessingInstruction { target, data })
97 }
98
99 fn push_doctype(&mut self, name: StrTendril) -> NodeId {
100 self.push_node(NodeData::Doctype { name })
101 }
102
103 fn append_impl<P, A>(&mut self, child: NodeOrText<NodeId>, previous: P, append: A)
105 where
106 P: FnOnce(&mut Document) -> Option<NodeId>,
107 A: FnOnce(&mut Document, NodeId),
108 {
109 let new_node = match child {
110 NodeOrText::AppendText(text) => {
111 if let Some(id) = previous(&mut self.document) {
114 if let Node {
115 data: NodeData::Text { text: existing },
116 ..
117 } = &mut self.document[id]
118 {
119 existing.push_tendril(&text);
120 return;
121 }
122 }
123 self.push_text(text)
124 }
125 NodeOrText::AppendNode(node) => node,
126 };
127
128 append(&mut self.document, new_node);
129 }
130}
131
132impl TreeSink for Sink {
133 type Handle = NodeId;
134 type Output = Document;
135
136 fn finish(self) -> Document {
137 self.document
138 }
139
140 fn parse_error(&mut self, _msg: Cow<'static, str>) {}
141
142 fn get_document(&mut self) -> NodeId {
143 NodeId::document_id()
144 }
145
146 fn elem_name<'a>(&'a self, &target: &'a NodeId) -> ExpandedName<'a> {
147 self.document[target]
148 .as_element()
149 .expect("Not an element")
151 .name
152 .expanded()
153 }
154
155 fn create_element(
156 &mut self,
157 name: QualName,
158 attrs: Vec<Attribute>,
159 _flags: ElementFlags,
160 ) -> NodeId {
161 let inlining_ignored = should_ignore(&attrs);
163
164 let (is_style, is_stylesheet) = {
166 if inlining_ignored {
168 (false, false)
169 } else if name.expanded() == expanded_name!(html "style") {
170 (true, false)
171 } else if name.expanded() == expanded_name!(html "link") {
172 let mut rel_stylesheet = false;
173 let mut href_non_empty = false;
174 for attr in &attrs {
175 if attr.name.local == local_name!("rel") && attr.value == "stylesheet".into() {
176 rel_stylesheet = true;
177 }
178 if attr.name.local == local_name!("href") && !attr.value.is_empty() {
180 href_non_empty = true;
181 }
182 if rel_stylesheet && href_non_empty {
183 break;
184 }
185 }
186 (false, rel_stylesheet && href_non_empty)
187 } else {
188 (false, false)
189 }
190 };
191 let element = self.push_element(name, attrs, inlining_ignored);
192 if is_style {
194 self.document.add_style(element);
195 }
196 if is_stylesheet {
197 self.document.add_linked_stylesheet(element);
198 }
199 element
200 }
201
202 fn create_comment(&mut self, text: StrTendril) -> NodeId {
203 self.push_comment(text)
204 }
205
206 fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> NodeId {
207 self.push_processing_instruction(target, data)
208 }
209
210 fn append(&mut self, &parent: &NodeId, child: NodeOrText<NodeId>) {
212 self.append_impl(
213 child,
214 |document| document[parent].last_child,
215 |document, new_node| document.append(parent, new_node),
216 );
217 }
218
219 fn append_based_on_parent_node(
220 &mut self,
221 element: &NodeId,
222 prev_element: &NodeId,
223 child: NodeOrText<NodeId>,
224 ) {
225 if self.document[*element].parent.is_some() {
226 self.append_before_sibling(element, child);
227 } else {
228 self.append(prev_element, child);
229 }
230 }
231
232 fn append_doctype_to_document(
234 &mut self,
235 name: StrTendril,
236 _public_id: StrTendril,
237 _system_id: StrTendril,
238 ) {
239 let node = self.push_doctype(name);
240 self.document.append(NodeId::document_id(), node);
241 }
242
243 fn get_template_contents(&mut self, &target: &NodeId) -> NodeId {
244 target
245 }
246
247 fn same_node(&self, x: &NodeId, y: &NodeId) -> bool {
249 x == y
250 }
251
252 fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
253
254 fn append_before_sibling(&mut self, &sibling: &NodeId, child: NodeOrText<NodeId>) {
256 self.append_impl(
257 child,
258 |document| document[sibling].previous_sibling,
259 |document, node| document.insert_before(sibling, node),
260 );
261 }
262
263 fn add_attrs_if_missing(&mut self, &target: &NodeId, attrs: Vec<Attribute>) {
265 let element = self.document[target]
266 .as_element_mut()
267 .expect("not an element");
268 let attributes = &mut element.attributes;
269 for attr in attrs {
270 if attributes
271 .attributes
272 .iter()
273 .any(|entry| entry.name == attr.name)
274 {
275 attributes.attributes.push(attr);
276 }
277 }
278 }
279
280 fn remove_from_parent(&mut self, &target: &NodeId) {
282 self.document.detach(target);
283 }
284
285 fn reparent_children(&mut self, node: &NodeId, new_parent: &NodeId) {
287 self.document.reparent_children(*node, *new_parent);
288 }
289}