robinson/
nodes.rs

1use std::borrow::Cow;
2use std::cmp::Ordering;
3use std::fmt;
4use std::hash::{Hash, Hasher};
5use std::iter::{Enumerate, from_fn};
6use std::num::NonZeroU32;
7use std::ops::Range;
8use std::slice::Iter;
9
10use crate::{
11    Document, Name, NameData,
12    error::{ErrorKind, Result},
13    strings::StringData,
14};
15
16impl<'input> Document<'input> {
17    pub fn root<'doc>(&'doc self) -> Node<'doc, 'input> {
18        self.node(NodeId::new(0).unwrap()).unwrap()
19    }
20
21    pub fn root_element<'doc>(&'doc self) -> Node<'doc, 'input> {
22        self.root().first_child_element().unwrap()
23    }
24
25    pub fn node<'doc>(&'doc self, id: NodeId) -> Option<Node<'doc, 'input>> {
26        self.nodes.get(id.get()).map(|data| Node {
27            id,
28            data,
29            doc: self,
30        })
31    }
32}
33
34#[derive(Clone, Copy)]
35pub struct Node<'doc, 'input> {
36    pub(crate) id: NodeId,
37    pub(crate) data: &'doc NodeData,
38    pub(crate) doc: &'doc Document<'input>,
39}
40
41impl PartialEq for Node<'_, '_> {
42    fn eq(&self, other: &Self) -> bool {
43        let doc = self.doc as *const Document;
44        let other_doc = other.doc as *const Document;
45
46        (self.id, doc) == (other.id, other_doc)
47    }
48}
49
50impl Eq for Node<'_, '_> {}
51
52impl PartialOrd for Node<'_, '_> {
53    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
54        Some(self.cmp(other))
55    }
56}
57
58impl Ord for Node<'_, '_> {
59    fn cmp(&self, other: &Self) -> Ordering {
60        let doc = self.doc as *const Document;
61        let other_doc = other.doc as *const Document;
62
63        (self.id, doc).cmp(&(other.id, other_doc))
64    }
65}
66
67impl Hash for Node<'_, '_> {
68    fn hash<H>(&self, hasher: &mut H)
69    where
70        H: Hasher,
71    {
72        let doc = self.doc as *const Document;
73
74        (self.id, doc).hash(hasher);
75    }
76}
77
78impl<'doc, 'input> Node<'doc, 'input> {
79    pub fn document(self) -> &'doc Document<'input> {
80        self.doc
81    }
82
83    pub fn id(self) -> NodeId {
84        self.id
85    }
86
87    pub fn is_root(&self) -> bool {
88        self.data.element.is_none() && self.data.text.is_none()
89    }
90
91    pub fn is_element(&self) -> bool {
92        self.data.element.is_some()
93    }
94
95    pub(crate) fn element_data(self) -> Option<&'doc ElementData<'input>> {
96        self.data
97            .element
98            .map(|element| &self.doc.elements[element.get()])
99    }
100
101    pub fn is_text(&self) -> bool {
102        self.data.text.is_some()
103    }
104
105    pub(crate) fn text_data(self) -> Option<&'doc StringData<'input>> {
106        self.data.text.map(|text| &self.doc.texts[text.get()])
107    }
108
109    fn other(self, id: NodeId) -> Self {
110        self.doc.node(id).unwrap()
111    }
112
113    fn iter<F>(self, f: F) -> impl Iterator<Item = Self> + Clone
114    where
115        F: Fn(Self) -> Option<Self> + Clone,
116    {
117        let mut next = Some(self);
118
119        from_fn(move || match next {
120            Some(next1) => {
121                next = f(next1);
122
123                Some(next1)
124            }
125            None => None,
126        })
127    }
128
129    pub fn parent(self) -> Option<Self> {
130        self.data.parent.map(|id| self.other(id))
131    }
132
133    pub fn ancestors(self) -> impl Iterator<Item = Self> + Clone {
134        self.iter(Self::parent)
135    }
136
137    pub fn prev_sibling(self) -> Option<Self> {
138        self.data.prev_sibling.map(|id| self.other(id))
139    }
140
141    pub fn prev_siblings(self) -> impl Iterator<Item = Self> + Clone {
142        self.iter(Self::prev_sibling)
143    }
144
145    pub fn prev_sibling_element(self) -> Option<Self> {
146        self.prev_siblings().find(Self::is_element)
147    }
148
149    pub fn next_sibling(self) -> Option<Self> {
150        self.data
151            .next_subtree
152            .filter(|id| self.doc.nodes[id.get()].prev_sibling.unwrap() == self.id)
153            .map(|id| self.other(id))
154    }
155
156    pub fn next_siblings(self) -> impl Iterator<Item = Self> + Clone {
157        self.iter(Self::next_sibling)
158    }
159
160    pub fn next_sibling_element(self) -> Option<Self> {
161        self.next_siblings().find(Self::is_element)
162    }
163
164    pub fn has_children(self) -> bool {
165        self.data.last_child.is_some()
166    }
167
168    pub fn first_child(self) -> Option<Self> {
169        if self.has_children() {
170            Some(self.other(self.id.next()))
171        } else {
172            None
173        }
174    }
175
176    pub fn first_child_element(self) -> Option<Self> {
177        self.child_elements().next()
178    }
179
180    pub fn last_child(self) -> Option<Self> {
181        self.data.last_child.map(|id| self.other(id))
182    }
183
184    pub fn last_child_element(self) -> Option<Self> {
185        self.child_elements().next_back()
186    }
187
188    pub fn children(self) -> Children<'doc, 'input> {
189        Children {
190            front: self.first_child(),
191            back: self.last_child(),
192        }
193    }
194
195    pub fn child_elements(self) -> impl DoubleEndedIterator<Item = Self> + Clone {
196        self.children().filter(Self::is_element)
197    }
198
199    /// ```
200    /// # use robinson::{Document, Name};
201    /// let doc = Document::parse(r#"<root><parent><child/></parent></root>"#).unwrap();
202    ///
203    /// let mut nodes = doc.root_element().descendants();
204    ///
205    /// let node = nodes.next().unwrap();
206    /// assert_eq!(node.name(), Some(Name { namespace: None, local: "root" }));
207    ///
208    /// let node = nodes.next().unwrap();
209    /// assert_eq!(node.name(), Some(Name { namespace: None, local: "parent" }));
210    ///
211    /// let node = nodes.next().unwrap();
212    /// assert_eq!(node.name(), Some(Name { namespace: None, local: "child" }));
213    ///
214    /// assert_eq!(nodes.next(), None);
215    pub fn descendants(self) -> Descendants<'doc, 'input> {
216        let from = self.id.get();
217
218        let until = self
219            .data
220            .next_subtree
221            .map_or(self.doc.nodes.len(), |id| id.get());
222
223        let nodes = self.doc.nodes[from..until].iter().enumerate();
224
225        Descendants {
226            from,
227            nodes,
228            doc: self.doc,
229        }
230    }
231
232    pub fn name(self) -> Option<Name<'doc, 'input>> {
233        self.element_data()
234            .map(|element| element.name.get(self.doc))
235    }
236
237    pub fn has_name<N>(self, name: N) -> bool
238    where
239        Name<'doc, 'input>: PartialEq<N>,
240    {
241        self.name().is_some_and(|name1| name1 == name)
242    }
243
244    pub fn text(self) -> Option<&'doc str> {
245        self.text_data().map(AsRef::as_ref)
246    }
247
248    pub fn child_texts(self) -> impl Iterator<Item = &'doc str> + Clone {
249        self.children().filter_map(Self::text)
250    }
251
252    /// ```
253    /// # use std::borrow::Cow;
254    /// # use robinson::Document;
255    /// let doc = Document::parse(r#"<root>foo<child>bar</child>baz</root>"#).unwrap();
256    ///
257    /// let text = doc.root_element().child_text();
258    ///
259    /// assert_eq!(text, Some(Cow::Owned("foobaz".to_owned())));
260    pub fn child_text(self) -> Option<Cow<'doc, str>> {
261        collect_text(self.child_texts())
262    }
263
264    pub fn descedant_texts(self) -> impl Iterator<Item = &'doc str> + Clone {
265        self.descendants().filter_map(Self::text)
266    }
267
268    /// ```
269    /// # use std::borrow::Cow;
270    /// # use robinson::Document;
271    /// let doc = Document::parse(r#"<root>foo<child>bar</child>baz</root>"#).unwrap();
272    ///
273    /// let text = doc.root_element().descedant_text();
274    ///
275    /// assert_eq!(text, Some(Cow::Owned("foobarbaz".to_owned())));
276    pub fn descedant_text(self) -> Option<Cow<'doc, str>> {
277        collect_text(self.descedant_texts())
278    }
279}
280
281fn collect_text<'doc>(mut iter: impl Iterator<Item = &'doc str> + Clone) -> Option<Cow<'doc, str>> {
282    let mut cnt = 0;
283    let mut len = 0;
284
285    for text in iter.clone() {
286        cnt += 1;
287        len += text.len();
288    }
289
290    if cnt == 0 {
291        return None;
292    } else if cnt == 1 {
293        let text = iter.next().unwrap();
294
295        return Some(Cow::Borrowed(text));
296    }
297
298    let mut buf = String::with_capacity(len);
299
300    for text in iter {
301        buf.push_str(text);
302    }
303
304    Some(Cow::Owned(buf))
305}
306
307#[derive(Debug)]
308pub(crate) struct NodeData {
309    pub(crate) element: Option<NodeId>,
310    pub(crate) text: Option<NodeId>,
311    pub(crate) parent: Option<NodeId>,
312    pub(crate) prev_sibling: Option<NodeId>,
313    pub(crate) next_subtree: Option<NodeId>,
314    pub(crate) last_child: Option<NodeId>,
315}
316
317const _SIZE_OF_NODE_DATA: () = assert!(size_of::<NodeData>() == 3 * size_of::<usize>());
318
319#[derive(Debug)]
320pub(crate) struct ElementData<'input> {
321    pub(crate) name: NameData<'input>,
322    pub(crate) attributes: Range<u32>,
323}
324
325const _SIZE_OF_ELEMENT_DATA: () =
326    assert!(size_of::<ElementData<'static>>() == (3 + 1) * size_of::<usize>());
327
328#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
329pub struct NodeId(NonZeroU32);
330
331impl NodeId {
332    pub fn new(id: usize) -> Result<Self> {
333        if id >= u32::MAX as usize {
334            return ErrorKind::TooManyNodes.into();
335        }
336
337        Ok(Self(NonZeroU32::new(id as u32 + 1).unwrap()))
338    }
339
340    pub fn get(self) -> usize {
341        self.0.get() as usize - 1
342    }
343
344    pub fn next(self) -> Self {
345        Self(self.0.checked_add(1).unwrap())
346    }
347}
348
349#[derive(Clone)]
350pub struct Children<'doc, 'input> {
351    front: Option<Node<'doc, 'input>>,
352    back: Option<Node<'doc, 'input>>,
353}
354
355impl<'doc, 'input> Iterator for Children<'doc, 'input> {
356    type Item = Node<'doc, 'input>;
357
358    fn next(&mut self) -> Option<Self::Item> {
359        let is_last = self.front == self.back;
360
361        let node = self.front.take();
362
363        if is_last {
364            self.back = None;
365        } else {
366            self.front = node.and_then(Node::next_sibling);
367        }
368
369        node
370    }
371}
372
373impl DoubleEndedIterator for Children<'_, '_> {
374    fn next_back(&mut self) -> Option<Self::Item> {
375        let is_last = self.front == self.back;
376
377        let node = self.back.take();
378
379        if is_last {
380            self.front = None;
381        } else {
382            self.back = node.and_then(Node::prev_sibling);
383        }
384
385        node
386    }
387}
388
389#[derive(Clone)]
390pub struct Descendants<'doc, 'input> {
391    from: usize,
392    nodes: Enumerate<Iter<'doc, NodeData>>,
393    doc: &'doc Document<'input>,
394}
395
396impl<'doc, 'input> Iterator for Descendants<'doc, 'input> {
397    type Item = Node<'doc, 'input>;
398
399    fn next(&mut self) -> Option<Self::Item> {
400        self.nodes.next().map(|(idx, data)| Node {
401            id: NodeId::new(self.from + idx).unwrap(),
402            data,
403            doc: self.doc,
404        })
405    }
406
407    fn nth(&mut self, n: usize) -> Option<Self::Item> {
408        self.nodes.nth(n).map(|(idx, data)| Node {
409            id: NodeId::new(self.from + idx).unwrap(),
410            data,
411            doc: self.doc,
412        })
413    }
414
415    fn size_hint(&self) -> (usize, Option<usize>) {
416        self.nodes.size_hint()
417    }
418}
419
420impl ExactSizeIterator for Descendants<'_, '_> {}
421
422impl DoubleEndedIterator for Descendants<'_, '_> {
423    fn next_back(&mut self) -> Option<Self::Item> {
424        self.nodes.next_back().map(|(idx, data)| Node {
425            id: NodeId::new(self.from + idx).unwrap(),
426            data,
427            doc: self.doc,
428        })
429    }
430}
431
432impl fmt::Debug for Node<'_, '_> {
433    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
434        let mut fmt = fmt.debug_struct("Node");
435
436        if let Some(name) = self.name() {
437            fmt.field("name", &name);
438        }
439
440        if let Some(text) = self.text() {
441            fmt.field("text", &text);
442        }
443
444        if self.has_attributes() {
445            fmt.field("attributes", &self.attributes());
446        }
447
448        if self.has_children() {
449            fmt.field("children", &self.children());
450        }
451
452        fmt.finish()
453    }
454}
455
456impl fmt::Debug for Children<'_, '_> {
457    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
458        fmt.debug_list().entries(self.clone()).finish()
459    }
460}
461
462impl fmt::Debug for Descendants<'_, '_> {
463    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
464        fmt.debug_list().entries(self.clone()).finish()
465    }
466}