1mod html;
4
5use crate::style::{StyleSet, StyleSetBuilder};
6use accessibility_scraper::selector::CssLocalName;
7use fast_html5ever::{LocalName, QualName};
8use std::borrow::Cow;
9use std::fmt;
10use std::iter::successors;
11
12pub struct Document {
13 pub nodes: Vec<Node>,
14 pub style_elements: Vec<NodeId>,
15 pub style_set: Option<StyleSet>,
16}
17
18pub struct Node {
19 pub parent: Option<NodeId>,
20 pub next_sibling: Option<NodeId>,
21 pub previous_sibling: Option<NodeId>,
22 pub first_child: Option<NodeId>,
23 pub last_child: Option<NodeId>,
24 pub data: NodeData,
25 pub node_id: Option<NodeId>,
26}
27
28#[derive(Copy, Clone, Debug, PartialEq, Eq)]
29pub struct NodeId(std::num::NonZeroUsize);
30
31impl Document {
32 fn new() -> Self {
33 let dummy = Node::new(NodeData::Document);
35 let document_node = Node::new(NodeData::Document);
36
37 Document {
38 nodes: vec![dummy, document_node],
39 style_elements: Vec::new(),
40 style_set: None,
41 }
42 }
43
44 pub fn document_node_id() -> NodeId {
45 NodeId(std::num::NonZeroUsize::new(1).unwrap())
46 }
47
48 pub fn parse_stylesheets(&self) -> StyleSet {
49 let mut style_set = StyleSetBuilder::new();
50 for &id in &self.style_elements {
51 let element = &self[id];
52 if let Some(type_attr) = element.as_element().unwrap().get_attr(&local_name!("type")) {
54 if !type_attr.eq_ignore_ascii_case("text/css") {
55 continue;
56 }
57 }
58 style_set.add_stylesheet(&self.child_text_content(id))
59 }
60 style_set.finish()
61 }
62
63 pub fn html_link_elements(&self) -> impl Iterator<Item = (&str, &str)> {
65 self.nodes()
66 .filter_map(move |node| self[node].as_element())
67 .filter(|e| e.name.expanded() == expanded_name!(html "link"))
68 .filter_map(|e| {
69 match (
70 e.get_attr(&local_name!("rel")),
71 e.get_attr(&local_name!("href")),
72 ) {
73 (Some(rel), Some(href)) => Some((rel, href)),
74 _ => None,
75 }
76 })
77 }
78
79 pub fn root_element(&self) -> NodeId {
80 let document_node = &self[Document::document_node_id()];
81 let mut root = None;
82
83 if matches!(document_node.data, NodeData::Document)
84 && document_node.parent.is_none()
85 && document_node.next_sibling.is_none()
86 && document_node.previous_sibling.is_none()
87 {
88 for child in self.node_and_following_siblings(document_node.first_child.unwrap()) {
89 match &self[child].data {
90 NodeData::Doctype { .. }
91 | NodeData::Comment { .. }
92 | NodeData::ProcessingInstruction { .. } => {}
93 NodeData::Document | NodeData::Text { .. } => {
94 println!("Unexpected node type under document node")
95 }
96 NodeData::Element(_) => {
97 if root.is_none() {
98 root = Some(child)
99 }
100 }
101 }
102 }
103 }
104
105 root.unwrap()
106 }
107
108 fn push_node(&mut self, mut node: Node) -> NodeId {
109 let next_index = self.nodes.len();
110 let id = NodeId(std::num::NonZeroUsize::new(next_index).unwrap());
111 let _ = node.node_id.insert(id);
112 self.nodes.push(node);
113
114 id
115 }
116
117 fn detach(&mut self, node: NodeId) {
118 let (parent, previous_sibling, next_sibling) = {
119 let node = &mut self[node];
120 (
121 node.parent.take(),
122 node.previous_sibling.take(),
123 node.next_sibling.take(),
124 )
125 };
126
127 if let Some(next_sibling) = next_sibling {
128 self[next_sibling].previous_sibling = previous_sibling
129 } else if let Some(parent) = parent {
130 self[parent].last_child = previous_sibling;
131 }
132
133 if let Some(previous_sibling) = previous_sibling {
134 self[previous_sibling].next_sibling = next_sibling;
135 } else if let Some(parent) = parent {
136 self[parent].first_child = next_sibling;
137 }
138 }
139
140 fn append(&mut self, parent: NodeId, new_child: NodeId) {
141 self.detach(new_child);
142 self[new_child].parent = Some(parent);
143 if let Some(last_child) = self[parent].last_child.take() {
144 self[new_child].previous_sibling = Some(last_child);
145 debug_assert!(self[last_child].next_sibling.is_none());
146 self[last_child].next_sibling = Some(new_child);
147 } else {
148 debug_assert!(self[parent].first_child.is_none());
149 self[parent].first_child = Some(new_child);
150 }
151 self[parent].last_child = Some(new_child);
152 }
153
154 fn insert_before(&mut self, sibling: NodeId, new_sibling: NodeId) {
155 self.detach(new_sibling);
156 self[new_sibling].parent = self[sibling].parent;
157 self[new_sibling].next_sibling = Some(sibling);
158 if let Some(previous_sibling) = self[sibling].previous_sibling.take() {
159 self[new_sibling].previous_sibling = Some(previous_sibling);
160 debug_assert_eq!(self[previous_sibling].next_sibling, Some(sibling));
161 self[previous_sibling].next_sibling = Some(new_sibling);
162 } else if let Some(parent) = self[sibling].parent {
163 debug_assert_eq!(self[parent].first_child, Some(sibling));
164 self[parent].first_child = Some(new_sibling);
165 }
166 self[sibling].previous_sibling = Some(new_sibling);
167 }
168
169 pub fn child_text_content(&self, node: NodeId) -> Cow<String> {
171 let mut link = self[node].first_child;
172 let mut text = None;
173 while let Some(child) = link {
174 if let NodeData::Text { contents } = &self[child].data {
175 match &mut text {
176 None => text = Some(Cow::Borrowed(contents)),
177 Some(text) => text.to_mut().push_str(&contents),
178 }
179 }
180 link = self[child].next_sibling;
181 }
182 text.unwrap_or_else(|| Cow::Owned(String::new()))
183 }
184
185 pub fn node_and_following_siblings<'a>(
186 &'a self,
187 node: NodeId,
188 ) -> impl Iterator<Item = NodeId> + 'a {
189 successors(Some(node), move |&node| self[node].next_sibling)
190 }
191
192 pub fn node_and_ancestors<'a>(&'a self, node: NodeId) -> impl Iterator<Item = NodeId> + 'a {
193 successors(Some(node), move |&node| self[node].parent)
194 }
195
196 fn next_in_tree_order(&self, node: NodeId) -> Option<NodeId> {
197 self[node].first_child.or_else(|| {
198 self.node_and_ancestors(node)
199 .find_map(|ancestor| self[ancestor].next_sibling)
200 })
201 }
202
203 pub fn nodes<'a>(&'a self) -> impl Iterator<Item = NodeId> + 'a {
204 let root = Self::document_node_id();
205 successors(Some(root), move |&node| self.next_in_tree_order(node))
206 }
207}
208
209impl std::ops::Index<NodeId> for Document {
210 type Output = Node;
211
212 #[inline]
213 fn index(&self, id: NodeId) -> &Node {
214 &self.nodes[id.0.get()]
215 }
216}
217
218impl std::ops::IndexMut<NodeId> for Document {
219 #[inline]
220 fn index_mut(&mut self, id: NodeId) -> &mut Node {
221 &mut self.nodes[id.0.get()]
222 }
223}
224
225pub enum NodeData {
226 Document,
227 Doctype {
228 _name: String,
229 _public_id: String,
230 _system_id: String,
231 },
232 Text {
233 contents: String,
234 },
235 Comment {
236 _contents: String,
237 },
238 Element(ElementData),
239 ProcessingInstruction {
240 _target: String,
241 _contents: String,
242 },
243}
244
245#[derive(Debug)]
246pub struct ElementData {
247 pub name: QualName,
248 pub attrs: Vec<Attribute>,
249 pub mathml_annotation_xml_integration_point: bool,
250 pub layout_data: atomic_refcell::AtomicRefCell<crate::layout::LayoutDataForElement>,
251 pub css_local_name: CssLocalName,
252}
253
254#[derive(Debug)]
255pub struct Attribute {
256 pub name: QualName,
258 pub value: String,
260}
261
262impl ElementData {
263 pub fn get_attr(&self, name: &LocalName) -> Option<&str> {
264 self.attrs
265 .iter()
266 .find(|attr| attr.name.ns == ns!() && attr.name.local == *name)
267 .map(|attr| &*attr.value)
268 }
269}
270
271#[test]
272#[cfg(target_pointer_width = "64")]
273fn size_of() {
274 use std::mem::size_of;
275 assert_eq!(size_of::<Node>(), 144);
276 assert_eq!(size_of::<NodeData>(), 96);
277 assert_eq!(size_of::<ElementData>(), 96);
278}
279
280impl Node {
281 pub fn in_html_document(&self) -> bool {
282 true
284 }
285
286 pub fn as_element(&self) -> Option<&ElementData> {
287 match self.data {
288 NodeData::Element(ref data) => Some(data),
289 _ => None,
290 }
291 }
292
293 pub fn html(&self) -> Option<&String> {
294 None
295 }
296
297 pub fn matches(
298 &self,
299 selector: &crate::style::selectors::Selector,
300 document: &Document,
301 element: NodeId,
302 ) -> bool {
303 crate::style::selectors::matches(selector, document, element)
304 }
305
306 pub fn as_text(&self) -> Option<&String> {
307 match self.data {
308 NodeData::Text { ref contents } => Some(contents),
309 _ => None,
310 }
311 }
312
313 fn new(data: NodeData) -> Self {
314 Node {
315 parent: None,
316 previous_sibling: None,
317 next_sibling: None,
318 first_child: None,
319 last_child: None,
320 data: data,
321 node_id: None,
322 }
323 }
324}
325
326impl fmt::Debug for Node {
327 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
328 let ptr: *const Node = self;
329 f.debug_tuple("Node").field(&ptr).finish()
330 }
331}