Skip to main content

xsd_schema/xpath/
iterator.rs

1//! XPath2 item and node iterator abstractions.
2//!
3//! Mirrors the design in `XML_NODE_ITERATOR_DESIGN.md`.
4
5use std::cell::RefCell;
6use std::marker::PhantomData;
7use std::rc::Rc;
8
9use num_bigint::BigInt;
10
11use crate::types::XmlValue;
12
13use super::error::XPathError;
14use super::item_set::{ItemSet, XPathComparer};
15use super::DomNavigator;
16
17/// XPath item (node or atomic value).
18#[derive(Debug, Clone)]
19pub enum XmlItem<N: DomNavigator> {
20    Node(N),
21    Atomic(XmlValue),
22}
23
24impl<N: DomNavigator> XmlItem<N> {
25    /// Check if this item is a node.
26    pub fn is_node(&self) -> bool {
27        matches!(self, XmlItem::Node(_))
28    }
29
30    /// Check if this item is an atomic value.
31    pub fn is_atomic(&self) -> bool {
32        matches!(self, XmlItem::Atomic(_))
33    }
34
35    /// Try to get a reference to the node.
36    pub fn as_node(&self) -> Option<&N> {
37        match self {
38            XmlItem::Node(n) => Some(n),
39            _ => None,
40        }
41    }
42
43    /// Try to get a reference to the atomic value.
44    pub fn as_atomic(&self) -> Option<&XmlValue> {
45        match self {
46            XmlItem::Atomic(v) => Some(v),
47            _ => None,
48        }
49    }
50
51    /// Try to extract a string from an atomic item.
52    pub fn as_str(&self) -> Option<String> {
53        self.as_atomic()
54            .and_then(|v| v.as_string().map(|s| s.to_string()))
55    }
56
57    /// Try to extract a boolean from an atomic item.
58    pub fn as_bool(&self) -> Option<bool> {
59        self.as_atomic().and_then(|v| v.as_boolean())
60    }
61
62    /// Try to extract a double from an atomic item.
63    pub fn as_f64(&self) -> Option<f64> {
64        self.as_atomic().and_then(|v| v.as_double())
65    }
66
67    /// Try to extract an integer from an atomic item.
68    pub fn as_integer(&self) -> Option<BigInt> {
69        self.as_atomic().and_then(|v| v.as_integer().cloned())
70    }
71}
72
73// ============================================================================
74// From Trait Implementations for XmlItem
75// ============================================================================
76
77impl<N: DomNavigator> From<N> for XmlItem<N> {
78    fn from(node: N) -> Self {
79        XmlItem::Node(node)
80    }
81}
82
83impl<N: DomNavigator> From<XmlValue> for XmlItem<N> {
84    fn from(value: XmlValue) -> Self {
85        XmlItem::Atomic(value)
86    }
87}
88
89/// Borrowed view of an XPath item.
90#[derive(Debug, Clone, Copy)]
91pub enum XmlItemRef<'a, N: DomNavigator> {
92    Node(&'a N),
93    Atomic(&'a XmlValue),
94}
95
96impl<'a, N: DomNavigator> XmlItemRef<'a, N> {
97    pub fn is_node(&self) -> bool {
98        matches!(self, XmlItemRef::Node(_))
99    }
100
101    pub fn from_item(item: &'a XmlItem<N>) -> Self {
102        match item {
103            XmlItem::Node(node) => XmlItemRef::Node(node),
104            XmlItem::Atomic(value) => XmlItemRef::Atomic(value),
105        }
106    }
107}
108
109/// Iterator over XPath items (nodes + atomic values).
110///
111/// This mirrors the .NET XPathNodeIterator shape: a cloneable cursor with
112/// `current` and `move_next` semantics.
113pub trait XmlNodeIterator: Clone {
114    type Navigator: DomNavigator;
115
116    /// Current item (None before first move_next or after end).
117    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>>;
118
119    /// 0-based position of the current item, or None if not started/finished.
120    fn current_position(&self) -> Option<usize>;
121
122    /// Advance to next item; returns false at end of sequence.
123    fn move_next(&mut self) -> Result<bool, XPathError>;
124
125    /// 1-based sequential position for axis iteration.
126    fn sequential_position(&self) -> Option<usize> {
127        self.current_position().map(|pos| pos + 1)
128    }
129
130    /// Reset sequential position tracking (used by position filters).
131    fn reset_sequential_position(&mut self) {}
132}
133
134fn clone_item_ref<N: DomNavigator>(item: XmlItemRef<'_, N>) -> XmlItem<N> {
135    match item {
136        XmlItemRef::Node(node) => XmlItem::Node(node.clone()),
137        XmlItemRef::Atomic(value) => XmlItem::Atomic(value.clone()),
138    }
139}
140
141/// Vector-backed iterator for simple tests and adapters.
142#[derive(Debug, Clone)]
143pub struct VecNodeIterator<N: DomNavigator> {
144    items: Vec<XmlItem<N>>,
145    index: Option<usize>,
146}
147
148impl<N: DomNavigator> VecNodeIterator<N> {
149    pub fn new(items: Vec<XmlItem<N>>) -> Self {
150        Self { items, index: None }
151    }
152}
153
154impl<N: DomNavigator> XmlNodeIterator for VecNodeIterator<N> {
155    type Navigator = N;
156
157    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
158        self.index.and_then(|i| match self.items.get(i) {
159            Some(XmlItem::Node(node)) => Some(XmlItemRef::Node(node)),
160            Some(XmlItem::Atomic(value)) => Some(XmlItemRef::Atomic(value)),
161            None => None,
162        })
163    }
164
165    fn current_position(&self) -> Option<usize> {
166        self.index
167    }
168
169    fn move_next(&mut self) -> Result<bool, XPathError> {
170        let next = match self.index {
171            None => 0,
172            Some(i) => i + 1,
173        };
174
175        if next < self.items.len() {
176            self.index = Some(next);
177            Ok(true)
178        } else {
179            self.index = None;
180            Ok(false)
181        }
182    }
183}
184
185/// Iterator that yields no items.
186#[derive(Debug, Clone, Copy, Default)]
187pub struct EmptyIterator<N: DomNavigator> {
188    _marker: PhantomData<N>,
189}
190
191impl<N: DomNavigator> EmptyIterator<N> {
192    pub fn new() -> Self {
193        Self {
194            _marker: PhantomData,
195        }
196    }
197}
198
199impl<N: DomNavigator> XmlNodeIterator for EmptyIterator<N> {
200    type Navigator = N;
201
202    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
203        None
204    }
205
206    fn current_position(&self) -> Option<usize> {
207        None
208    }
209
210    fn move_next(&mut self) -> Result<bool, XPathError> {
211        Ok(false)
212    }
213}
214
215struct BufferedState<I: XmlNodeIterator> {
216    source: I,
217    buffer: Vec<XmlItem<I::Navigator>>,
218    exhausted: bool,
219}
220
221/// Buffered iterator that can be replayed without re-reading the source.
222#[derive(Clone)]
223pub struct BufferedNodeIterator<I: XmlNodeIterator> {
224    state: Rc<RefCell<BufferedState<I>>>,
225    index: Option<usize>,
226    current: Option<XmlItem<I::Navigator>>,
227}
228
229impl<I: XmlNodeIterator> BufferedNodeIterator<I> {
230    pub fn new(source: I) -> Self {
231        Self {
232            state: Rc::new(RefCell::new(BufferedState {
233                source,
234                buffer: Vec::new(),
235                exhausted: false,
236            })),
237            index: None,
238            current: None,
239        }
240    }
241
242    pub fn from_ref(source: &I) -> Self {
243        Self::new(source.clone())
244    }
245
246    pub fn preload(source: I) -> Result<Self, XPathError> {
247        let mut iter = Self::new(source);
248        iter.fill()?;
249        Ok(iter)
250    }
251
252    pub fn fill(&mut self) -> Result<(), XPathError> {
253        let mut state = self.state.borrow_mut();
254        if state.exhausted {
255            return Ok(());
256        }
257        while state.source.move_next()? {
258            let next_item = state.source.current().map(clone_item_ref);
259            if let Some(item) = next_item {
260                state.buffer.push(item);
261            } else {
262                state.exhausted = true;
263                return Ok(());
264            }
265        }
266        state.exhausted = true;
267        Ok(())
268    }
269}
270
271impl<I: XmlNodeIterator> XmlNodeIterator for BufferedNodeIterator<I> {
272    type Navigator = I::Navigator;
273
274    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
275        self.current.as_ref().map(XmlItemRef::from_item)
276    }
277
278    fn current_position(&self) -> Option<usize> {
279        self.index
280    }
281
282    fn move_next(&mut self) -> Result<bool, XPathError> {
283        let next_index = match self.index {
284            None => 0,
285            Some(i) => i + 1,
286        };
287
288        let mut state = self.state.borrow_mut();
289        if next_index < state.buffer.len() {
290            self.index = Some(next_index);
291            self.current = state.buffer.get(next_index).cloned();
292            return Ok(true);
293        }
294
295        if state.exhausted {
296            self.index = None;
297            self.current = None;
298            return Ok(false);
299        }
300
301        if state.source.move_next()? {
302            let next_item = state.source.current().map(clone_item_ref);
303            if let Some(item) = next_item {
304                state.buffer.push(item.clone());
305                self.index = Some(next_index);
306                self.current = Some(item);
307                return Ok(true);
308            }
309            state.exhausted = true;
310            self.index = None;
311            self.current = None;
312            return Ok(false);
313        }
314
315        state.exhausted = true;
316        self.index = None;
317        self.current = None;
318        Ok(false)
319    }
320}
321
322/// Iterator over an inclusive integer range (XPath `to` expression).
323#[derive(Debug, Clone)]
324pub struct RangeIterator<N: DomNavigator> {
325    min: BigInt,
326    max: BigInt,
327    current_value: Option<BigInt>,
328    current_item: Option<XmlItem<N>>,
329    index: Option<usize>,
330    done: bool,
331    _marker: PhantomData<N>,
332}
333
334impl<N: DomNavigator> RangeIterator<N> {
335    pub fn new(min: BigInt, max: BigInt) -> Self {
336        let done = min > max;
337        Self {
338            min,
339            max,
340            current_value: None,
341            current_item: None,
342            index: None,
343            done,
344            _marker: PhantomData,
345        }
346    }
347
348    pub fn from_i64(min: i64, max: i64) -> Self {
349        Self::new(BigInt::from(min), BigInt::from(max))
350    }
351}
352
353impl<N: DomNavigator> XmlNodeIterator for RangeIterator<N> {
354    type Navigator = N;
355
356    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
357        self.current_item.as_ref().map(XmlItemRef::from_item)
358    }
359
360    fn current_position(&self) -> Option<usize> {
361        self.index
362    }
363
364    fn move_next(&mut self) -> Result<bool, XPathError> {
365        if self.done {
366            self.current_value = None;
367            self.current_item = None;
368            self.index = None;
369            return Ok(false);
370        }
371
372        let next_value = match &self.current_value {
373            None => self.min.clone(),
374            Some(value) => value + 1,
375        };
376
377        if next_value > self.max {
378            self.done = true;
379            self.current_value = None;
380            self.current_item = None;
381            self.index = None;
382            return Ok(false);
383        }
384
385        self.current_value = Some(next_value.clone());
386        self.current_item = Some(XmlItem::Atomic(XmlValue::integer(next_value)));
387        self.index = Some(match self.index {
388            None => 0,
389            Some(i) => i + 1,
390        });
391        Ok(true)
392    }
393}
394
395/// Iterator that enforces document order for node sequences.
396#[derive(Debug, Clone)]
397pub struct DocumentOrderNodeIterator<N: DomNavigator> {
398    items: ItemSet<XmlItem<N>>,
399    item_index: usize,
400    index: Option<usize>,
401    current: Option<XmlItem<N>>,
402    last_node: Option<N>,
403}
404
405impl<N: DomNavigator> DocumentOrderNodeIterator<N> {
406    pub fn new<I: XmlNodeIterator<Navigator = N>>(mut base: I) -> Result<Self, XPathError> {
407        let mut is_node: Option<bool> = None;
408        let mut items = ItemSet::new();
409
410        while base.move_next()? {
411            let item = match base.current() {
412                Some(item) => item,
413                None => break,
414            };
415            let item_is_node = matches!(item, XmlItemRef::Node(_));
416            if let Some(prev) = is_node {
417                if prev != item_is_node {
418                    return Err(XPathError::XPTY0018);
419                }
420            } else {
421                is_node = Some(item_is_node);
422            }
423            items.add(clone_item_ref(item));
424        }
425
426        if is_node == Some(true) {
427            let comparer = XPathComparer::new();
428            items.sort_with(&comparer);
429        }
430
431        Ok(Self {
432            items,
433            item_index: 0,
434            index: None,
435            current: None,
436            last_node: None,
437        })
438    }
439}
440
441impl<N: DomNavigator> XmlNodeIterator for DocumentOrderNodeIterator<N> {
442    type Navigator = N;
443
444    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
445        self.current.as_ref().map(XmlItemRef::from_item)
446    }
447
448    fn current_position(&self) -> Option<usize> {
449        self.index
450    }
451
452    fn move_next(&mut self) -> Result<bool, XPathError> {
453        while self.item_index < self.items.len() {
454            let item = self.items[self.item_index].clone();
455            self.item_index += 1;
456
457            if let XmlItem::Node(nav) = &item {
458                if let Some(last) = self.last_node.as_ref() {
459                    if last.is_same_position(nav) {
460                        continue;
461                    }
462                }
463                self.last_node = Some(nav.clone());
464            }
465
466            self.current = Some(item);
467            let next_index = match self.index {
468                None => 0,
469                Some(i) => i + 1,
470            };
471            self.index = Some(next_index);
472            return Ok(true);
473        }
474
475        self.index = None;
476        self.current = None;
477        Ok(false)
478    }
479}
480
481/// Iterator that returns the item at a specific sequential position.
482#[derive(Debug, Clone)]
483pub struct PositionFilterNodeIterator<I: XmlNodeIterator> {
484    position: usize,
485    iter: I,
486    index: Option<usize>,
487    current: Option<XmlItem<I::Navigator>>,
488    done: bool,
489}
490
491impl<I: XmlNodeIterator> PositionFilterNodeIterator<I> {
492    pub fn new(position: usize, iter: I) -> Self {
493        Self {
494            position,
495            iter,
496            index: None,
497            current: None,
498            done: false,
499        }
500    }
501}
502
503impl<I: XmlNodeIterator> XmlNodeIterator for PositionFilterNodeIterator<I> {
504    type Navigator = I::Navigator;
505
506    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
507        self.current.as_ref().map(XmlItemRef::from_item)
508    }
509
510    fn current_position(&self) -> Option<usize> {
511        self.index
512    }
513
514    fn move_next(&mut self) -> Result<bool, XPathError> {
515        if self.done {
516            self.index = None;
517            self.current = None;
518            return Ok(false);
519        }
520
521        while self.iter.move_next()? {
522            let seq_pos = match self.iter.sequential_position() {
523                Some(pos) => pos,
524                None => continue,
525            };
526            if seq_pos == self.position {
527                self.iter.reset_sequential_position();
528                self.current = self.iter.current().map(clone_item_ref);
529                self.index = Some(0);
530                self.done = true;
531                return Ok(self.current.is_some());
532            }
533        }
534
535        self.done = true;
536        self.index = None;
537        self.current = None;
538        Ok(false)
539    }
540}
541
542/// Iterator that returns atomic items and errors on nodes.
543#[derive(Debug, Clone)]
544pub struct ItemIterator<I: XmlNodeIterator> {
545    iter: I,
546    started: bool,
547    index: Option<usize>,
548    current: Option<XmlItem<I::Navigator>>,
549}
550
551impl<I: XmlNodeIterator> ItemIterator<I> {
552    pub fn new(iter: I) -> Self {
553        Self {
554            iter,
555            started: false,
556            index: None,
557            current: None,
558        }
559    }
560}
561
562impl<I: XmlNodeIterator> XmlNodeIterator for ItemIterator<I> {
563    type Navigator = I::Navigator;
564
565    fn current(&self) -> Option<XmlItemRef<'_, Self::Navigator>> {
566        self.current.as_ref().map(XmlItemRef::from_item)
567    }
568
569    fn current_position(&self) -> Option<usize> {
570        self.index
571    }
572
573    fn move_next(&mut self) -> Result<bool, XPathError> {
574        if !self.started {
575            self.started = true;
576            if self.iter.current_position().is_some() {
577                let item = match self.iter.current() {
578                    Some(item) => item,
579                    None => {
580                        self.index = None;
581                        self.current = None;
582                        return Ok(false);
583                    }
584                };
585                if matches!(item, XmlItemRef::Node(_)) {
586                    return Err(XPathError::XPTY0018);
587                }
588                self.current = Some(clone_item_ref(item));
589                self.index = Some(0);
590                return Ok(true);
591            }
592        }
593
594        if self.iter.move_next()? {
595            let item = match self.iter.current() {
596                Some(item) => item,
597                None => {
598                    self.index = None;
599                    self.current = None;
600                    return Ok(false);
601                }
602            };
603            if matches!(item, XmlItemRef::Node(_)) {
604                return Err(XPathError::XPTY0018);
605            }
606            self.current = Some(clone_item_ref(item));
607            let next_index = match self.index {
608                None => 0,
609                Some(i) => i + 1,
610            };
611            self.index = Some(next_index);
612            return Ok(true);
613        }
614
615        self.index = None;
616        self.current = None;
617        Ok(false)
618    }
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    use crate::navigator::RoXmlNavigator;
626    use crate::types::XmlValue;
627
628    fn current_integer<N: DomNavigator>(iter: &impl XmlNodeIterator<Navigator = N>) -> BigInt {
629        match iter.current() {
630            Some(XmlItemRef::Atomic(value)) => value.as_integer().expect("integer value").clone(),
631            _ => panic!("expected integer value"),
632        }
633    }
634
635    #[test]
636    fn test_empty_iterator() {
637        let mut iter: EmptyIterator<RoXmlNavigator<'static>> = EmptyIterator::new();
638        assert!(!iter.move_next().unwrap());
639        assert!(iter.current().is_none());
640        assert!(iter.current_position().is_none());
641    }
642
643    #[test]
644    fn test_range_iterator_values() {
645        let mut iter: RangeIterator<RoXmlNavigator<'static>> = RangeIterator::from_i64(1, 3);
646        assert!(iter.move_next().unwrap());
647        assert_eq!(current_integer(&iter), BigInt::from(1));
648        assert_eq!(iter.current_position(), Some(0));
649        assert_eq!(iter.sequential_position(), Some(1));
650
651        assert!(iter.move_next().unwrap());
652        assert_eq!(current_integer(&iter), BigInt::from(2));
653        assert_eq!(iter.current_position(), Some(1));
654        assert_eq!(iter.sequential_position(), Some(2));
655
656        assert!(iter.move_next().unwrap());
657        assert_eq!(current_integer(&iter), BigInt::from(3));
658        assert_eq!(iter.current_position(), Some(2));
659        assert_eq!(iter.sequential_position(), Some(3));
660
661        assert!(!iter.move_next().unwrap());
662        assert!(iter.current().is_none());
663    }
664
665    #[test]
666    fn test_range_iterator_empty() {
667        let mut iter: RangeIterator<RoXmlNavigator<'static>> = RangeIterator::from_i64(5, 3);
668        assert!(!iter.move_next().unwrap());
669        assert!(iter.current().is_none());
670    }
671
672    #[test]
673    fn test_buffered_iterator_replays() {
674        let source: VecNodeIterator<RoXmlNavigator<'static>> = VecNodeIterator::new(vec![
675            XmlItem::Atomic(XmlValue::integer(BigInt::from(1))),
676            XmlItem::Atomic(XmlValue::integer(BigInt::from(2))),
677        ]);
678
679        let mut buffered = BufferedNodeIterator::new(source);
680        assert!(buffered.move_next().unwrap());
681        assert_eq!(current_integer(&buffered), BigInt::from(1));
682
683        let mut clone = buffered.clone();
684        assert_eq!(current_integer(&clone), BigInt::from(1));
685
686        assert!(buffered.move_next().unwrap());
687        assert_eq!(current_integer(&buffered), BigInt::from(2));
688
689        assert!(clone.move_next().unwrap());
690        assert_eq!(current_integer(&clone), BigInt::from(2));
691    }
692
693    #[test]
694    fn test_document_order_iterator_dedupes() {
695        let doc = roxmltree::Document::parse("<root><a/><a/></root>").expect("parse xml");
696        let mut nav = RoXmlNavigator::new(&doc);
697        nav.move_to_first_child(); // root
698        nav.move_to_first_child(); // a
699        let first = nav.clone();
700        nav.move_to_next_sibling(); // a
701        let second = nav.clone();
702
703        let source: VecNodeIterator<RoXmlNavigator<'_>> = VecNodeIterator::new(vec![
704            XmlItem::Node(second),
705            XmlItem::Node(first.clone()),
706            XmlItem::Node(first),
707        ]);
708
709        let mut iter = DocumentOrderNodeIterator::new(source).unwrap();
710        let mut names = Vec::new();
711        while iter.move_next().unwrap() {
712            match iter.current() {
713                Some(XmlItemRef::Node(node)) => names.push(node.local_name().to_string()),
714                _ => panic!("expected node"),
715            }
716        }
717        assert_eq!(names, vec!["a".to_string(), "a".to_string()]);
718    }
719
720    #[test]
721    fn test_document_order_iterator_rejects_mixed_sequence() {
722        let doc = roxmltree::Document::parse("<root><a/></root>").expect("parse xml");
723        let mut nav = RoXmlNavigator::new(&doc);
724        nav.move_to_first_child();
725        nav.move_to_first_child();
726        let source: VecNodeIterator<RoXmlNavigator<'_>> = VecNodeIterator::new(vec![
727            XmlItem::Node(nav.clone()),
728            XmlItem::Atomic(XmlValue::integer(BigInt::from(1))),
729        ]);
730
731        let result = DocumentOrderNodeIterator::new(source);
732        assert!(matches!(result, Err(XPathError::XPTY0018)));
733    }
734
735    #[test]
736    fn test_position_filter_iterator() {
737        let source: VecNodeIterator<RoXmlNavigator<'static>> = VecNodeIterator::new(vec![
738            XmlItem::Atomic(XmlValue::integer(BigInt::from(1))),
739            XmlItem::Atomic(XmlValue::integer(BigInt::from(2))),
740        ]);
741
742        let mut iter = PositionFilterNodeIterator::new(2, source);
743        assert!(iter.move_next().unwrap());
744        assert_eq!(current_integer(&iter), BigInt::from(2));
745        assert!(!iter.move_next().unwrap());
746    }
747
748    #[test]
749    fn test_item_iterator_returns_atomic() {
750        let source: VecNodeIterator<RoXmlNavigator<'static>> = VecNodeIterator::new(vec![
751            XmlItem::Atomic(XmlValue::integer(BigInt::from(1))),
752            XmlItem::Atomic(XmlValue::integer(BigInt::from(2))),
753        ]);
754
755        let mut iter = ItemIterator::new(source);
756        assert!(iter.move_next().unwrap());
757        assert_eq!(current_integer(&iter), BigInt::from(1));
758        assert!(iter.move_next().unwrap());
759        assert_eq!(current_integer(&iter), BigInt::from(2));
760        assert!(!iter.move_next().unwrap());
761    }
762
763    #[test]
764    fn test_item_iterator_rejects_nodes() {
765        let doc = roxmltree::Document::parse("<root><a/></root>").expect("parse xml");
766        let mut nav = RoXmlNavigator::new(&doc);
767        nav.move_to_first_child();
768        nav.move_to_first_child();
769
770        let source: VecNodeIterator<RoXmlNavigator<'_>> =
771            VecNodeIterator::new(vec![XmlItem::Node(nav.clone())]);
772        let mut iter = ItemIterator::new(source);
773        let result = iter.move_next();
774        assert!(matches!(result, Err(XPathError::XPTY0018)));
775    }
776}