1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::fmt;
4use std::hash::{Hash, Hasher};
5use std::iter::{Enumerate, from_fn};
6use std::num::NonZeroU32;
7use std::ops::Range;
8use std::slice::Iter;
9
10use crate::{
11 Document, Name, NameData,
12 error::{ErrorKind, Result},
13 strings::StringData,
14};
15
16impl<'input> Document<'input> {
17 pub fn root<'doc>(&'doc self) -> Node<'doc, 'input> {
18 self.node(NodeId::new(0).unwrap()).unwrap()
19 }
20
21 pub fn root_element<'doc>(&'doc self) -> Node<'doc, 'input> {
22 self.root().first_child_element().unwrap()
23 }
24
25 pub fn node<'doc>(&'doc self, id: NodeId) -> Option<Node<'doc, 'input>> {
26 self.nodes.get(id.get()).map(|data| Node {
27 id,
28 data,
29 doc: self,
30 })
31 }
32}
33
34#[derive(Clone, Copy)]
35pub struct Node<'doc, 'input> {
36 pub(crate) id: NodeId,
37 pub(crate) data: &'doc NodeData,
38 pub(crate) doc: &'doc Document<'input>,
39}
40
41impl PartialEq for Node<'_, '_> {
42 fn eq(&self, other: &Self) -> bool {
43 let doc = self.doc as *const Document;
44 let other_doc = other.doc as *const Document;
45
46 (self.id, doc) == (other.id, other_doc)
47 }
48}
49
50impl Eq for Node<'_, '_> {}
51
52impl PartialOrd for Node<'_, '_> {
53 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
54 Some(self.cmp(other))
55 }
56}
57
58impl Ord for Node<'_, '_> {
59 fn cmp(&self, other: &Self) -> Ordering {
60 let doc = self.doc as *const Document;
61 let other_doc = other.doc as *const Document;
62
63 (self.id, doc).cmp(&(other.id, other_doc))
64 }
65}
66
67impl Hash for Node<'_, '_> {
68 fn hash<H>(&self, hasher: &mut H)
69 where
70 H: Hasher,
71 {
72 let doc = self.doc as *const Document;
73
74 (self.id, doc).hash(hasher);
75 }
76}
77
78impl<'doc, 'input> Node<'doc, 'input> {
79 pub fn document(self) -> &'doc Document<'input> {
80 self.doc
81 }
82
83 pub fn id(self) -> NodeId {
84 self.id
85 }
86
87 pub fn is_root(&self) -> bool {
88 self.data.element.is_none() && self.data.text.is_none()
89 }
90
91 pub fn is_element(&self) -> bool {
92 self.data.element.is_some()
93 }
94
95 pub(crate) fn element_data(self) -> Option<&'doc ElementData<'input>> {
96 self.data
97 .element
98 .map(|element| &self.doc.elements[element.get()])
99 }
100
101 pub fn is_text(&self) -> bool {
102 self.data.text.is_some()
103 }
104
105 pub(crate) fn text_data(self) -> Option<&'doc StringData<'input>> {
106 self.data.text.map(|text| &self.doc.texts[text.get()])
107 }
108
109 fn other(self, id: NodeId) -> Self {
110 self.doc.node(id).unwrap()
111 }
112
113 fn iter<F>(self, f: F) -> impl Iterator<Item = Self> + Clone
114 where
115 F: Fn(Self) -> Option<Self> + Clone,
116 {
117 let mut next = Some(self);
118
119 from_fn(move || match next {
120 Some(next1) => {
121 next = f(next1);
122
123 Some(next1)
124 }
125 None => None,
126 })
127 }
128
129 pub fn parent(self) -> Option<Self> {
130 self.data.parent.map(|id| self.other(id))
131 }
132
133 pub fn ancestors(self) -> impl Iterator<Item = Self> + Clone {
134 self.iter(Self::parent)
135 }
136
137 pub fn prev_sibling(self) -> Option<Self> {
138 self.data.prev_sibling.map(|id| self.other(id))
139 }
140
141 pub fn prev_siblings(self) -> impl Iterator<Item = Self> + Clone {
142 self.iter(Self::prev_sibling)
143 }
144
145 pub fn prev_sibling_element(self) -> Option<Self> {
146 self.prev_siblings().find(Self::is_element)
147 }
148
149 pub fn next_sibling(self) -> Option<Self> {
150 self.data
151 .next_subtree
152 .filter(|id| self.doc.nodes[id.get()].prev_sibling.unwrap() == self.id)
153 .map(|id| self.other(id))
154 }
155
156 pub fn next_siblings(self) -> impl Iterator<Item = Self> + Clone {
157 self.iter(Self::next_sibling)
158 }
159
160 pub fn next_sibling_element(self) -> Option<Self> {
161 self.next_siblings().find(Self::is_element)
162 }
163
164 pub fn has_children(self) -> bool {
165 self.data.last_child.is_some()
166 }
167
168 pub fn first_child(self) -> Option<Self> {
169 if self.has_children() {
170 Some(self.other(self.id.next()))
171 } else {
172 None
173 }
174 }
175
176 pub fn first_child_element(self) -> Option<Self> {
177 self.child_elements().next()
178 }
179
180 pub fn last_child(self) -> Option<Self> {
181 self.data.last_child.map(|id| self.other(id))
182 }
183
184 pub fn last_child_element(self) -> Option<Self> {
185 self.child_elements().next_back()
186 }
187
188 pub fn children(self) -> Children<'doc, 'input> {
189 Children {
190 front: self.first_child(),
191 back: self.last_child(),
192 }
193 }
194
195 pub fn child_elements(self) -> impl DoubleEndedIterator<Item = Self> + Clone {
196 self.children().filter(Self::is_element)
197 }
198
199 pub fn descendants(self) -> Descendants<'doc, 'input> {
216 let from = self.id.get();
217
218 let until = self
219 .data
220 .next_subtree
221 .map_or(self.doc.nodes.len(), |id| id.get());
222
223 let nodes = self.doc.nodes[from..until].iter().enumerate();
224
225 Descendants {
226 from,
227 nodes,
228 doc: self.doc,
229 }
230 }
231
232 pub fn name(self) -> Option<Name<'doc, 'input>> {
233 self.element_data()
234 .map(|element| element.name.get(self.doc))
235 }
236
237 pub fn has_name<N>(self, name: N) -> bool
238 where
239 Name<'doc, 'input>: PartialEq<N>,
240 {
241 self.name().is_some_and(|name1| name1 == name)
242 }
243
244 pub fn text(self) -> Option<&'doc str> {
245 self.text_data().map(AsRef::as_ref)
246 }
247
248 pub fn child_texts(self) -> impl Iterator<Item = &'doc str> + Clone {
249 self.children().filter_map(Self::text)
250 }
251
252 pub fn child_text(self) -> Option<Cow<'doc, str>> {
261 collect_text(self.child_texts())
262 }
263
264 pub fn descedant_texts(self) -> impl Iterator<Item = &'doc str> + Clone {
265 self.descendants().filter_map(Self::text)
266 }
267
268 pub fn descedant_text(self) -> Option<Cow<'doc, str>> {
277 collect_text(self.descedant_texts())
278 }
279}
280
281fn collect_text<'doc>(mut iter: impl Iterator<Item = &'doc str> + Clone) -> Option<Cow<'doc, str>> {
282 let mut cnt = 0;
283 let mut len = 0;
284
285 for text in iter.clone() {
286 cnt += 1;
287 len += text.len();
288 }
289
290 if cnt == 0 {
291 return None;
292 } else if cnt == 1 {
293 let text = iter.next().unwrap();
294
295 return Some(Cow::Borrowed(text));
296 }
297
298 let mut buf = String::with_capacity(len);
299
300 for text in iter {
301 buf.push_str(text);
302 }
303
304 Some(Cow::Owned(buf))
305}
306
307#[derive(Debug)]
308pub(crate) struct NodeData {
309 pub(crate) element: Option<NodeId>,
310 pub(crate) text: Option<NodeId>,
311 pub(crate) parent: Option<NodeId>,
312 pub(crate) prev_sibling: Option<NodeId>,
313 pub(crate) next_subtree: Option<NodeId>,
314 pub(crate) last_child: Option<NodeId>,
315}
316
317const _SIZE_OF_NODE_DATA: () = assert!(size_of::<NodeData>() == 3 * size_of::<usize>());
318
319#[derive(Debug)]
320pub(crate) struct ElementData<'input> {
321 pub(crate) name: NameData<'input>,
322 pub(crate) attributes: Range<u32>,
323}
324
325const _SIZE_OF_ELEMENT_DATA: () =
326 assert!(size_of::<ElementData<'static>>() == (3 + 1) * size_of::<usize>());
327
328#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
329pub struct NodeId(NonZeroU32);
330
331impl NodeId {
332 pub fn new(id: usize) -> Result<Self> {
333 if id >= u32::MAX as usize {
334 return ErrorKind::TooManyNodes.into();
335 }
336
337 Ok(Self(NonZeroU32::new(id as u32 + 1).unwrap()))
338 }
339
340 pub fn get(self) -> usize {
341 self.0.get() as usize - 1
342 }
343
344 pub fn next(self) -> Self {
345 Self(self.0.checked_add(1).unwrap())
346 }
347}
348
349#[derive(Clone)]
350pub struct Children<'doc, 'input> {
351 front: Option<Node<'doc, 'input>>,
352 back: Option<Node<'doc, 'input>>,
353}
354
355impl<'doc, 'input> Iterator for Children<'doc, 'input> {
356 type Item = Node<'doc, 'input>;
357
358 fn next(&mut self) -> Option<Self::Item> {
359 let is_last = self.front == self.back;
360
361 let node = self.front.take();
362
363 if is_last {
364 self.back = None;
365 } else {
366 self.front = node.and_then(Node::next_sibling);
367 }
368
369 node
370 }
371}
372
373impl DoubleEndedIterator for Children<'_, '_> {
374 fn next_back(&mut self) -> Option<Self::Item> {
375 let is_last = self.front == self.back;
376
377 let node = self.back.take();
378
379 if is_last {
380 self.front = None;
381 } else {
382 self.back = node.and_then(Node::prev_sibling);
383 }
384
385 node
386 }
387}
388
389#[derive(Clone)]
390pub struct Descendants<'doc, 'input> {
391 from: usize,
392 nodes: Enumerate<Iter<'doc, NodeData>>,
393 doc: &'doc Document<'input>,
394}
395
396impl<'doc, 'input> Iterator for Descendants<'doc, 'input> {
397 type Item = Node<'doc, 'input>;
398
399 fn next(&mut self) -> Option<Self::Item> {
400 self.nodes.next().map(|(idx, data)| Node {
401 id: NodeId::new(self.from + idx).unwrap(),
402 data,
403 doc: self.doc,
404 })
405 }
406
407 fn nth(&mut self, n: usize) -> Option<Self::Item> {
408 self.nodes.nth(n).map(|(idx, data)| Node {
409 id: NodeId::new(self.from + idx).unwrap(),
410 data,
411 doc: self.doc,
412 })
413 }
414
415 fn size_hint(&self) -> (usize, Option<usize>) {
416 self.nodes.size_hint()
417 }
418}
419
420impl ExactSizeIterator for Descendants<'_, '_> {}
421
422impl DoubleEndedIterator for Descendants<'_, '_> {
423 fn next_back(&mut self) -> Option<Self::Item> {
424 self.nodes.next_back().map(|(idx, data)| Node {
425 id: NodeId::new(self.from + idx).unwrap(),
426 data,
427 doc: self.doc,
428 })
429 }
430}
431
432impl fmt::Debug for Node<'_, '_> {
433 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
434 let mut fmt = fmt.debug_struct("Node");
435
436 if let Some(name) = self.name() {
437 fmt.field("name", &name);
438 }
439
440 if let Some(text) = self.text() {
441 fmt.field("text", &text);
442 }
443
444 if self.has_attributes() {
445 fmt.field("attributes", &self.attributes());
446 }
447
448 if self.has_children() {
449 fmt.field("children", &self.children());
450 }
451
452 fmt.finish()
453 }
454}
455
456impl fmt::Debug for Children<'_, '_> {
457 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
458 fmt.debug_list().entries(self.clone()).finish()
459 }
460}
461
462impl fmt::Debug for Descendants<'_, '_> {
463 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
464 fmt.debug_list().entries(self.clone()).finish()
465 }
466}