robinson/
nodes.rs

1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::fmt;
4use std::hash::{Hash, Hasher};
5use std::iter::{Enumerate, from_fn};
6use std::num::NonZeroU32;
7use std::slice::Iter;
8
9use crate::{
10    Document, Name, NameData,
11    error::{ErrorKind, Result},
12};
13
14impl<'input> Document<'input> {
15    pub fn root<'doc>(&'doc self) -> Node<'doc, 'input> {
16        self.node(NodeId::ROOT).unwrap()
17    }
18
19    pub fn root_element<'doc>(&'doc self) -> Node<'doc, 'input> {
20        self.root().first_child_element().unwrap()
21    }
22
23    pub fn node<'doc>(&'doc self, id: NodeId) -> Option<Node<'doc, 'input>> {
24        self.nodes.get(id.get()).map(|data| Node {
25            id,
26            data,
27            doc: self,
28        })
29    }
30}
31
32#[derive(Clone, Copy)]
33pub struct Node<'doc, 'input> {
34    pub(crate) id: NodeId,
35    pub(crate) data: &'doc NodeData,
36    pub(crate) doc: &'doc Document<'input>,
37}
38
39impl PartialEq for Node<'_, '_> {
40    fn eq(&self, other: &Self) -> bool {
41        let doc = self.doc as *const Document;
42        let other_doc = other.doc as *const Document;
43
44        (self.id, doc) == (other.id, other_doc)
45    }
46}
47
48impl Eq for Node<'_, '_> {}
49
50impl PartialOrd for Node<'_, '_> {
51    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
52        Some(self.cmp(other))
53    }
54}
55
56impl Ord for Node<'_, '_> {
57    fn cmp(&self, other: &Self) -> Ordering {
58        let doc = self.doc as *const Document;
59        let other_doc = other.doc as *const Document;
60
61        (self.id, doc).cmp(&(other.id, other_doc))
62    }
63}
64
65impl Hash for Node<'_, '_> {
66    fn hash<H>(&self, hasher: &mut H)
67    where
68        H: Hasher,
69    {
70        let doc = self.doc as *const Document;
71
72        (self.id, doc).hash(hasher);
73    }
74}
75
76impl<'doc, 'input> Node<'doc, 'input> {
77    pub fn document(self) -> &'doc Document<'input> {
78        self.doc
79    }
80
81    pub fn id(self) -> NodeId {
82        self.id
83    }
84
85    pub fn is_root(&self) -> bool {
86        self.id == NodeId::ROOT
87    }
88
89    pub fn is_element(&self) -> bool {
90        self.data.element.is_some()
91    }
92
93    pub(crate) fn element_data(self) -> Option<&'doc ElementData<'input>> {
94        self.data
95            .element
96            .map(|element| &self.doc.elements[element.get()])
97    }
98
99    pub fn is_text(&self) -> bool {
100        self.data.text.is_some()
101    }
102
103    fn other(self, id: NodeId) -> Self {
104        self.doc.node(id).unwrap()
105    }
106
107    fn iter<F>(self, f: F) -> impl Iterator<Item = Self> + Clone
108    where
109        F: Fn(Self) -> Option<Self> + Clone,
110    {
111        let mut next = Some(self);
112
113        from_fn(move || match next {
114            Some(next1) => {
115                next = f(next1);
116
117                Some(next1)
118            }
119            None => None,
120        })
121    }
122
123    pub fn parent(self) -> Option<Self> {
124        self.data.parent.map(|id| self.other(id))
125    }
126
127    pub fn ancestors(self) -> impl Iterator<Item = Self> + Clone {
128        self.iter(Self::parent)
129    }
130
131    pub fn prev_sibling(self) -> Option<Self> {
132        self.data.prev_sibling.map(|id| self.other(id))
133    }
134
135    pub fn prev_siblings(self) -> impl Iterator<Item = Self> + Clone {
136        self.iter(Self::prev_sibling)
137    }
138
139    pub fn prev_sibling_element(self) -> Option<Self> {
140        self.prev_siblings().find(Self::is_element)
141    }
142
143    pub fn next_sibling(self) -> Option<Self> {
144        self.data
145            .next_subtree
146            .filter(|id| self.doc.nodes[id.get()].prev_sibling.unwrap() == self.id)
147            .map(|id| self.other(id))
148    }
149
150    pub fn next_siblings(self) -> impl Iterator<Item = Self> + Clone {
151        self.iter(Self::next_sibling)
152    }
153
154    pub fn next_sibling_element(self) -> Option<Self> {
155        self.next_siblings().find(Self::is_element)
156    }
157
158    pub fn has_children(self) -> bool {
159        self.data.last_child.is_some()
160    }
161
162    pub fn first_child(self) -> Option<Self> {
163        if self.has_children() {
164            Some(self.other(self.id.next()))
165        } else {
166            None
167        }
168    }
169
170    pub fn first_child_element(self) -> Option<Self> {
171        self.child_elements().next()
172    }
173
174    pub fn last_child(self) -> Option<Self> {
175        self.data.last_child.map(|id| self.other(id))
176    }
177
178    pub fn last_child_element(self) -> Option<Self> {
179        self.child_elements().next_back()
180    }
181
182    pub fn children(self) -> Children<'doc, 'input> {
183        Children {
184            front: self.first_child(),
185            back: self.last_child(),
186        }
187    }
188
189    pub fn child_elements(self) -> impl DoubleEndedIterator<Item = Self> + Clone {
190        self.children().filter(Self::is_element)
191    }
192
193    /// ```
194    /// # use robinson::{Document, Name};
195    /// let doc = Document::parse(r#"<root><parent><child/></parent></root>"#).unwrap();
196    ///
197    /// let mut nodes = doc.root_element().descendants();
198    ///
199    /// let node = nodes.next().unwrap();
200    /// assert_eq!(node.name(), Some(Name { namespace: None, local: "root" }));
201    ///
202    /// let node = nodes.next().unwrap();
203    /// assert_eq!(node.name(), Some(Name { namespace: None, local: "parent" }));
204    ///
205    /// let node = nodes.next().unwrap();
206    /// assert_eq!(node.name(), Some(Name { namespace: None, local: "child" }));
207    ///
208    /// assert_eq!(nodes.next(), None);
209    pub fn descendants(self) -> Descendants<'doc, 'input> {
210        let from = self.id.get();
211
212        let until = self
213            .data
214            .next_subtree
215            .map_or(self.doc.nodes.len(), |id| id.get());
216
217        let nodes = self.doc.nodes[from..until].iter().enumerate();
218
219        Descendants {
220            from,
221            nodes,
222            doc: self.doc,
223        }
224    }
225
226    pub fn name(self) -> Option<Name<'doc, 'input>> {
227        self.element_data()
228            .map(|element| element.name.get(self.doc))
229    }
230
231    pub fn has_name<N>(self, name: N) -> bool
232    where
233        Name<'doc, 'input>: PartialEq<N>,
234    {
235        self.name().is_some_and(|name1| name1 == name)
236    }
237
238    pub fn text(self) -> Option<&'doc str> {
239        self.data.text.map(|text| self.doc.strings.get(text))
240    }
241
242    pub fn child_texts(self) -> impl Iterator<Item = &'doc str> + Clone {
243        self.children().filter_map(Self::text)
244    }
245
246    /// ```
247    /// # use std::borrow::Cow;
248    /// # use robinson::Document;
249    /// let doc = Document::parse(r#"<root>foo<child>bar</child>baz</root>"#).unwrap();
250    ///
251    /// let text = doc.root_element().child_text();
252    ///
253    /// assert_eq!(text, Some(Cow::Owned("foobaz".to_owned())));
254    pub fn child_text(self) -> Option<Cow<'doc, str>> {
255        collect_text(self.child_texts())
256    }
257
258    pub fn descedant_texts(self) -> impl Iterator<Item = &'doc str> + Clone {
259        self.descendants().filter_map(Self::text)
260    }
261
262    /// ```
263    /// # use std::borrow::Cow;
264    /// # use robinson::Document;
265    /// let doc = Document::parse(r#"<root>foo<child>bar</child>baz</root>"#).unwrap();
266    ///
267    /// let text = doc.root_element().descedant_text();
268    ///
269    /// assert_eq!(text, Some(Cow::Owned("foobarbaz".to_owned())));
270    pub fn descedant_text(self) -> Option<Cow<'doc, str>> {
271        collect_text(self.descedant_texts())
272    }
273}
274
275fn collect_text<'doc>(mut iter: impl Iterator<Item = &'doc str> + Clone) -> Option<Cow<'doc, str>> {
276    let mut cnt = 0;
277    let mut len = 0;
278
279    for text in iter.clone() {
280        cnt += 1;
281        len += text.len();
282    }
283
284    if cnt == 0 {
285        return None;
286    } else if cnt == 1 {
287        let text = iter.next().unwrap();
288
289        return Some(Cow::Borrowed(text));
290    }
291
292    let mut buf = String::with_capacity(len);
293
294    for text in iter {
295        buf.push_str(text);
296    }
297
298    Some(Cow::Owned(buf))
299}
300
301pub(crate) struct NodeData {
302    pub(crate) element: Option<NodeId>,
303    pub(crate) text: Option<NodeId>,
304    pub(crate) parent: Option<NodeId>,
305    pub(crate) prev_sibling: Option<NodeId>,
306    pub(crate) next_subtree: Option<NodeId>,
307    pub(crate) last_child: Option<NodeId>,
308}
309
310const _SIZE_OF_NODE_DATA: () = assert!(size_of::<NodeData>() == 3 * size_of::<usize>());
311
312#[repr(Rust, packed)]
313pub(crate) struct ElementData<'input> {
314    pub(crate) name: NameData<'input>,
315    pub(crate) attributes_start: u32,
316    pub(crate) attributes_end: u32,
317}
318
319const _SIZE_OF_ELEMENT_DATA: () = assert!(
320    size_of::<ElementData<'static>>()
321        == size_of::<u16>() + 2 * size_of::<usize>() + 2 * size_of::<u32>()
322);
323
324#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
325pub struct NodeId(NonZeroU32);
326
327impl NodeId {
328    pub(crate) const ROOT: Self = Self(NonZeroU32::new(1).unwrap());
329
330    pub(crate) fn new(id: usize) -> Result<Self> {
331        if id >= u32::MAX as usize {
332            return ErrorKind::TooManyNodes.into();
333        }
334
335        Ok(Self(NonZeroU32::new(id as u32 + 1).unwrap()))
336    }
337
338    pub(crate) fn get(self) -> usize {
339        self.0.get() as usize - 1
340    }
341
342    fn next(self) -> Self {
343        Self(self.0.checked_add(1).unwrap())
344    }
345}
346
347#[derive(Clone)]
348pub struct Children<'doc, 'input> {
349    front: Option<Node<'doc, 'input>>,
350    back: Option<Node<'doc, 'input>>,
351}
352
353impl<'doc, 'input> Iterator for Children<'doc, 'input> {
354    type Item = Node<'doc, 'input>;
355
356    fn next(&mut self) -> Option<Self::Item> {
357        let is_last = self.front == self.back;
358
359        let node = self.front.take();
360
361        if is_last {
362            self.back = None;
363        } else {
364            self.front = node.and_then(Node::next_sibling);
365        }
366
367        node
368    }
369}
370
371impl DoubleEndedIterator for Children<'_, '_> {
372    fn next_back(&mut self) -> Option<Self::Item> {
373        let is_last = self.front == self.back;
374
375        let node = self.back.take();
376
377        if is_last {
378            self.front = None;
379        } else {
380            self.back = node.and_then(Node::prev_sibling);
381        }
382
383        node
384    }
385}
386
387#[derive(Clone)]
388pub struct Descendants<'doc, 'input> {
389    from: usize,
390    nodes: Enumerate<Iter<'doc, NodeData>>,
391    doc: &'doc Document<'input>,
392}
393
394impl<'doc, 'input> Descendants<'doc, 'input> {
395    fn get(&self, (idx, data): (usize, &'doc NodeData)) -> Node<'doc, 'input> {
396        Node {
397            id: NodeId::new(self.from + idx).unwrap(),
398            data,
399            doc: self.doc,
400        }
401    }
402}
403
404impl<'doc, 'input> Iterator for Descendants<'doc, 'input> {
405    type Item = Node<'doc, 'input>;
406
407    fn next(&mut self) -> Option<Self::Item> {
408        self.nodes.next().map(|node| self.get(node))
409    }
410
411    fn nth(&mut self, n: usize) -> Option<Self::Item> {
412        self.nodes.nth(n).map(|node| self.get(node))
413    }
414
415    fn size_hint(&self) -> (usize, Option<usize>) {
416        self.nodes.size_hint()
417    }
418}
419
420impl ExactSizeIterator for Descendants<'_, '_> {}
421
422impl DoubleEndedIterator for Descendants<'_, '_> {
423    fn next_back(&mut self) -> Option<Self::Item> {
424        self.nodes.next_back().map(|node| self.get(node))
425    }
426}
427
428impl fmt::Debug for Node<'_, '_> {
429    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
430        let mut fmt = fmt.debug_struct("Node");
431
432        if let Some(name) = self.name() {
433            fmt.field("name", &name);
434        }
435
436        if let Some(text) = self.text() {
437            fmt.field("text", &text);
438        }
439
440        if self.has_attributes() {
441            fmt.field("attributes", &self.attributes());
442        }
443
444        if self.has_children() {
445            fmt.field("children", &self.children());
446        }
447
448        fmt.finish()
449    }
450}
451
452impl fmt::Debug for Children<'_, '_> {
453    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
454        fmt.debug_list().entries(self.clone()).finish()
455    }
456}
457
458impl fmt::Debug for Descendants<'_, '_> {
459    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
460        fmt.debug_list().entries(self.clone()).finish()
461    }
462}