scraper/html/
tree_sink.rs1use super::Html;
2use crate::node::{Doctype, Element, Node, ProcessingInstruction, Text};
3use ego_tree::NodeId;
4use fast_html5ever::tendril::StrTendril;
5use fast_html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
6use fast_html5ever::Attribute;
7use fast_html5ever::{ExpandedName, QualName};
8use std::borrow::Cow;
9
10impl TreeSink for Html {
12 type Output = Self;
13 type Handle = NodeId;
14
15 fn finish(self) -> Self {
16 self
17 }
18
19 fn parse_error(&mut self, _: Cow<'static, str>) {}
21
22 fn set_quirks_mode(&mut self, mode: QuirksMode) {
24 self.quirks_mode = mode;
25 }
26
27 fn get_document(&mut self) -> Self::Handle {
29 self.tree.root().id()
30 }
31
32 fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
34 x == y
35 }
36
37 fn elem_name(&self, target: &Self::Handle) -> ExpandedName {
41 self.tree
42 .get(*target)
43 .unwrap()
44 .value()
45 .as_element()
46 .unwrap()
47 .name
48 .expanded()
49 }
50
51 fn create_element(
57 &mut self,
58 name: QualName,
59 attrs: Vec<Attribute>,
60 _flags: ElementFlags,
61 ) -> Self::Handle {
62 let mut node = self
63 .tree
64 .orphan(Node::Element(Element::new(name.clone(), attrs)));
65 if name.expanded() == expanded_name!(html "template") {
66 node.append(Node::Fragment);
67 }
68 node.id()
69 }
70
71 fn create_comment(&mut self, _: StrTendril) {}
73
74 fn append_doctype_to_document(
76 &mut self,
77 name: StrTendril,
78 public_id: StrTendril,
79 system_id: StrTendril,
80 ) {
81 let doctype = Doctype {
82 name: name.into_send().into(),
83 public_id: public_id.into_send().into(),
84 system_id: system_id.into_send().into(),
85 };
86 self.tree.root_mut().append(Node::Doctype(doctype));
87 }
88
89 fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) {
94 let mut parent = self.tree.get_mut(*parent).unwrap();
95
96 match child {
97 NodeOrText::AppendNode(id) => {
98 parent.append_id(id);
99 }
100
101 NodeOrText::AppendText(text) => {
102 let can_concat = parent
103 .last_child()
104 .map_or(false, |mut n| n.value().is_text());
105
106 let text = text.into_send().into();
107
108 if can_concat {
109 let mut last_child = parent.last_child().unwrap();
110 match *last_child.value() {
111 Node::Text(ref mut t) => t.text.push_tendril(&text),
112 _ => unreachable!(),
113 }
114 } else {
115 parent.append(Node::Text(Text { text }));
116 }
117 }
118 }
119 }
120
121 fn append_before_sibling(
130 &mut self,
131 sibling: &Self::Handle,
132 new_node: NodeOrText<Self::Handle>,
133 ) {
134 if let NodeOrText::AppendNode(id) = new_node {
135 self.tree.get_mut(id).unwrap().detach();
136 }
137
138 let mut sibling = self.tree.get_mut(*sibling).unwrap();
139 if sibling.parent().is_some() {
140 match new_node {
141 NodeOrText::AppendNode(id) => {
142 sibling.insert_id_before(id);
143 }
144
145 NodeOrText::AppendText(text) => {
146 let text = text.into_send().into();
147 let can_concat = sibling
148 .prev_sibling()
149 .map_or(false, |mut n| n.value().is_text());
150
151 if can_concat {
152 let mut prev_sibling = sibling.prev_sibling().unwrap();
153 match *prev_sibling.value() {
154 Node::Text(ref mut t) => t.text.push_tendril(&text),
155 _ => unreachable!(),
156 }
157 } else {
158 sibling.insert_before(Node::Text(Text { text }));
159 }
160 }
161 }
162 }
163 }
164
165 fn remove_from_parent(&mut self, target: &Self::Handle) {
167 if let Some(mut p) = self.tree.get_mut(*target) {
168 p.detach();
169 }
170 }
171
172 fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
174 if let Some(mut p) = self.tree.get_mut(*new_parent) {
175 p.reparent_from_id_append(*node);
176 }
177 }
178
179 fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>) {
182 let mut node = self.tree.get_mut(*target).unwrap();
183 let element = match *node.value() {
184 Node::Element(ref mut e) => e,
185 _ => unreachable!(),
186 };
187
188 for attr in attrs {
189 element
190 .attrs
191 .entry(attr.name)
192 .or_insert(attr.value.into_send().into());
193 }
194 }
195
196 fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
201 self.tree.get(*target).unwrap().first_child().unwrap().id()
202 }
203
204 fn mark_script_already_started(&mut self, _node: &Self::Handle) {}
206
207 fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle {
209 self.tree
210 .orphan(Node::ProcessingInstruction(ProcessingInstruction {
211 target: target.into_send().into(),
212 data: data.into_send().into(),
213 }))
214 .id()
215 }
216
217 fn append_based_on_parent_node(
218 &mut self,
219 element: &Self::Handle,
220 prev_element: &Self::Handle,
221 child: NodeOrText<Self::Handle>,
222 ) {
223 if self.tree.get(*element).unwrap().parent().is_some() {
224 self.append_before_sibling(element, child)
225 } else {
226 self.append(prev_element, child)
227 }
228 }
229}