sws_scraper/html/
tree_sink.rs1use std::borrow::Cow;
2
3use html5ever::tendril::StrTendril;
4use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
5use html5ever::Attribute;
6use html5ever::{ExpandedName, QualName};
7use sws_tree::NodeId;
8
9use super::Html;
10use crate::node::{Comment, Doctype, Element, Node, ProcessingInstruction, Text};
11
12impl TreeSink for Html {
14 type Output = Self;
15 type Handle = NodeId;
16
17 fn finish(self) -> Self {
18 self
19 }
20
21 fn parse_error(&mut self, msg: Cow<'static, str>) {
23 self.errors.push(msg);
24 }
25
26 fn set_quirks_mode(&mut self, mode: QuirksMode) {
28 self.quirks_mode = mode;
29 }
30
31 fn get_document(&mut self) -> Self::Handle {
33 self.tree.root().id()
34 }
35
36 fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
38 x == y
39 }
40
41 fn elem_name(&self, target: &Self::Handle) -> ExpandedName {
45 self.tree
46 .get(*target)
47 .unwrap()
48 .map_value(|v| unsafe { std::mem::transmute(v.as_element().unwrap().name.expanded()) })
49 .unwrap()
50 }
51
52 fn create_element(
58 &mut self,
59 name: QualName,
60 attrs: Vec<Attribute>,
61 _flags: ElementFlags,
62 ) -> Self::Handle {
63 let fragment = name.expanded() == expanded_name!(html "template");
64
65 let node_id = self
66 .tree
67 .orphan(Node::Element(Element::new(name.clone(), attrs)));
68
69 if fragment {
70 self.tree.get(node_id).unwrap().append(Node::Fragment);
71 }
72
73 node_id
74 }
75
76 fn create_comment(&mut self, text: StrTendril) -> Self::Handle {
78 self.tree.orphan(Node::Comment(Comment { comment: text }))
79 }
80
81 fn append_doctype_to_document(
83 &mut self,
84 name: StrTendril,
85 public_id: StrTendril,
86 system_id: StrTendril,
87 ) {
88 let doctype = Doctype {
89 name,
90 public_id,
91 system_id,
92 };
93 self.tree.root().append(Node::Doctype(doctype));
94 }
95
96 fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) {
101 let mut parent = self.tree.get(*parent).unwrap();
102
103 match child {
104 NodeOrText::AppendNode(id) => {
105 parent.append_id(id);
106 }
107
108 NodeOrText::AppendText(text) => {
109 let can_concat = parent
110 .last_child()
111 .map_or(false, |n| n.map_value(|v| v.is_text()).unwrap_or(false));
112
113 if can_concat {
114 let last_child = parent.last_child().unwrap();
115 last_child.update_value(|v| match v {
116 Node::Text(ref mut t) => t.text.push_tendril(&text),
117 _ => unreachable!(),
118 });
119 } else {
120 parent.append(Node::Text(Text { text }));
121 }
122 }
123 }
124 }
125
126 fn append_before_sibling(
135 &mut self,
136 sibling: &Self::Handle,
137 new_node: NodeOrText<Self::Handle>,
138 ) {
139 if let NodeOrText::AppendNode(id) = new_node {
140 self.tree.get(id).unwrap().detach();
141 }
142
143 let mut sibling = self.tree.get(*sibling).unwrap();
144 if sibling.parent().is_some() {
145 match new_node {
146 NodeOrText::AppendNode(id) => {
147 sibling.insert_id_before(id);
148 }
149
150 NodeOrText::AppendText(text) => {
151 let can_concat = sibling
152 .prev_sibling()
153 .map_or(false, |n| n.map_value(|v| v.is_text()).unwrap_or(false));
154
155 if can_concat {
156 let prev_sibling = sibling.prev_sibling().unwrap();
157 prev_sibling.update_value(|v| match v {
158 Node::Text(ref mut t) => t.text.push_tendril(&text),
159 _ => unreachable!(),
160 });
161 } else {
162 sibling.insert_before(Node::Text(Text { text }));
163 }
164 }
165 }
166 }
167 }
168
169 fn remove_from_parent(&mut self, target: &Self::Handle) {
171 self.tree.get(*target).unwrap().detach();
172 }
173
174 fn reparent_children(&mut self, node: &Self::Handle, new_parent: &Self::Handle) {
176 self.tree
177 .get(*new_parent)
178 .unwrap()
179 .reparent_from_id_append(*node);
180 }
181
182 fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>) {
185 let node = self.tree.get(*target).unwrap();
186 node.update_value(|v| match v {
187 Node::Element(ref mut element) => {
188 for attr in attrs {
189 element.attrs.entry(attr.name).or_insert(attr.value);
190 }
191 }
192 _ => unreachable!(),
193 });
194 }
195
196 fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
201 self.tree.get(*target).unwrap().first_child().unwrap().id()
202 }
203
204 fn mark_script_already_started(&mut self, _node: &Self::Handle) {}
206
207 fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle {
209 self.tree
210 .orphan(Node::ProcessingInstruction(ProcessingInstruction {
211 target,
212 data,
213 }))
214 }
215
216 fn append_based_on_parent_node(
217 &mut self,
218 element: &Self::Handle,
219 prev_element: &Self::Handle,
220 child: NodeOrText<Self::Handle>,
221 ) {
222 if self.tree.get(*element).unwrap().parent().is_some() {
223 self.append_before_sibling(element, child)
224 } else {
225 self.append(prev_element, child)
226 }
227 }
228}