Skip to main content

xsd_schema/xpath/
sequence_ops.rs

1//! Sequence operations for XPath evaluation.
2//!
3//! This module implements XPath 2.0 sequence operations:
4//! - Union (|) - node-only, returns nodes in document order
5//! - Intersect - node-only, returns nodes in document order
6//! - Except - node-only, returns nodes in document order
7//!
8//! Note: Union, intersect, and except are NODE-ONLY operations in XPath 2.0.
9//! Applying them to atomic values results in XPTY0004.
10
11use std::collections::HashSet;
12
13use crate::types::value::XmlValue;
14
15use super::error::XPathError;
16use super::item_set::{ItemSet, XPathComparer, XPathEqualityComparer};
17use super::iterator::XmlItem;
18use super::DomNavigator;
19
20// ============================================================================
21// Node-only sequence operations (union, intersect, except)
22// ============================================================================
23
24/// Compute the union of two node sequences.
25///
26/// Returns all nodes from both sequences, deduplicated by node identity,
27/// sorted in document order.
28///
29/// # XPath Semantics
30///
31/// The union operator `|` is defined only for node sequences. The result
32/// contains all nodes that are in either operand, in document order, with
33/// duplicates removed.
34///
35/// # Errors
36///
37/// Returns XPTY0004 if either sequence contains non-node items.
38pub fn union_nodes<N: DomNavigator + Clone>(
39    left: Vec<XmlItem<N>>,
40    right: Vec<XmlItem<N>>,
41) -> Result<Vec<XmlItem<N>>, XPathError> {
42    let mut result: ItemSet<XmlItem<N>> = ItemSet::with_capacity(left.len() + right.len());
43    let eq_comparer = XPathEqualityComparer::new();
44
45    // Add all items from left, checking they are nodes
46    for item in left {
47        if !matches!(item, XmlItem::Node(_)) {
48            return Err(XPathError::type_mismatch(
49                "node()*".to_string(),
50                "atomic value".to_string(),
51            ));
52        }
53        // Check for duplicates using node identity
54        let is_duplicate = result
55            .iter()
56            .any(|existing| eq_comparer.equals(existing, &item));
57        if !is_duplicate {
58            result.add(item);
59        }
60    }
61
62    // Add items from right that aren't already in result
63    for item in right {
64        if !matches!(item, XmlItem::Node(_)) {
65            return Err(XPathError::type_mismatch(
66                "node()*".to_string(),
67                "atomic value".to_string(),
68            ));
69        }
70        let is_duplicate = result
71            .iter()
72            .any(|existing| eq_comparer.equals(existing, &item));
73        if !is_duplicate {
74            result.add(item);
75        }
76    }
77
78    // Sort in document order
79    let comparer = XPathComparer::new();
80    result.sort_with(&comparer);
81
82    Ok(result.into_iter().collect())
83}
84
85/// Compute the intersection of two node sequences.
86///
87/// Returns nodes that appear in both sequences, sorted in document order.
88///
89/// # XPath Semantics
90///
91/// The intersect operator returns all nodes that are in both operands,
92/// in document order.
93///
94/// # Errors
95///
96/// Returns XPTY0004 if either sequence contains non-node items.
97pub fn intersect_nodes<N: DomNavigator + Clone>(
98    left: Vec<XmlItem<N>>,
99    right: Vec<XmlItem<N>>,
100) -> Result<Vec<XmlItem<N>>, XPathError> {
101    // Validate that all items are nodes
102    for item in &left {
103        if !matches!(item, XmlItem::Node(_)) {
104            return Err(XPathError::type_mismatch(
105                "node()*".to_string(),
106                "atomic value".to_string(),
107            ));
108        }
109    }
110    for item in &right {
111        if !matches!(item, XmlItem::Node(_)) {
112            return Err(XPathError::type_mismatch(
113                "node()*".to_string(),
114                "atomic value".to_string(),
115            ));
116        }
117    }
118
119    let eq_comparer = XPathEqualityComparer::new();
120    let mut result: ItemSet<XmlItem<N>> = ItemSet::new();
121
122    // Find nodes in left that also exist in right
123    for left_item in left {
124        let in_right = right.iter().any(|r| eq_comparer.equals(&left_item, r));
125        if in_right {
126            // Check we haven't already added this node
127            let already_added = result
128                .iter()
129                .any(|existing| eq_comparer.equals(existing, &left_item));
130            if !already_added {
131                result.add(left_item);
132            }
133        }
134    }
135
136    // Sort in document order
137    let comparer = XPathComparer::new();
138    result.sort_with(&comparer);
139
140    Ok(result.into_iter().collect())
141}
142
143/// Compute the difference of two node sequences (left except right).
144///
145/// Returns nodes that appear in left but not in right, sorted in document order.
146///
147/// # XPath Semantics
148///
149/// The except operator returns all nodes that are in the first operand
150/// but not in the second operand, in document order.
151///
152/// # Errors
153///
154/// Returns XPTY0004 if either sequence contains non-node items.
155pub fn except_nodes<N: DomNavigator + Clone>(
156    left: Vec<XmlItem<N>>,
157    right: Vec<XmlItem<N>>,
158) -> Result<Vec<XmlItem<N>>, XPathError> {
159    // Validate that all items are nodes
160    for item in &left {
161        if !matches!(item, XmlItem::Node(_)) {
162            return Err(XPathError::type_mismatch(
163                "node()*".to_string(),
164                "atomic value".to_string(),
165            ));
166        }
167    }
168    for item in &right {
169        if !matches!(item, XmlItem::Node(_)) {
170            return Err(XPathError::type_mismatch(
171                "node()*".to_string(),
172                "atomic value".to_string(),
173            ));
174        }
175    }
176
177    let eq_comparer = XPathEqualityComparer::new();
178    let mut result: ItemSet<XmlItem<N>> = ItemSet::new();
179
180    // Find nodes in left that do NOT exist in right
181    for left_item in left {
182        let in_right = right.iter().any(|r| eq_comparer.equals(&left_item, r));
183        if !in_right {
184            // Check we haven't already added this node
185            let already_added = result
186                .iter()
187                .any(|existing| eq_comparer.equals(existing, &left_item));
188            if !already_added {
189                result.add(left_item);
190            }
191        }
192    }
193
194    // Sort in document order
195    let comparer = XPathComparer::new();
196    result.sort_with(&comparer);
197
198    Ok(result.into_iter().collect())
199}
200
201// ============================================================================
202// Atomic value sequence operations (for fn:distinct-values, etc.)
203// ============================================================================
204
205/// Compute the union of two atomic value sequences (deduplicates by value equality).
206///
207/// Note: This is NOT the XPath `|` operator (which is node-only).
208/// This is for internal use with atomic value sequences.
209pub fn union_atomic_values(left: Vec<XmlValue>, right: Vec<XmlValue>) -> Vec<XmlValue> {
210    let mut seen = HashSet::new();
211    let mut result = Vec::with_capacity(left.len() + right.len());
212
213    for item in left.into_iter().chain(right) {
214        let key = item.to_string_value();
215        if !seen.contains(&key) {
216            seen.insert(key);
217            result.push(item);
218        }
219    }
220
221    result
222}
223
224/// Compute the intersection of two atomic value sequences.
225///
226/// Note: This is NOT the XPath `intersect` operator (which is node-only).
227pub fn intersect_atomic_values(left: Vec<XmlValue>, right: Vec<XmlValue>) -> Vec<XmlValue> {
228    let right_set: HashSet<String> = right.iter().map(|v| v.to_string_value()).collect();
229
230    let mut seen = HashSet::new();
231    let mut result = Vec::new();
232
233    for item in left {
234        let key = item.to_string_value();
235        if right_set.contains(&key) && !seen.contains(&key) {
236            seen.insert(key);
237            result.push(item);
238        }
239    }
240
241    result
242}
243
244/// Compute the difference of two atomic value sequences.
245///
246/// Note: This is NOT the XPath `except` operator (which is node-only).
247pub fn except_atomic_values(left: Vec<XmlValue>, right: Vec<XmlValue>) -> Vec<XmlValue> {
248    let right_set: HashSet<String> = right.iter().map(|v| v.to_string_value()).collect();
249
250    let mut seen = HashSet::new();
251    let mut result = Vec::new();
252
253    for item in left {
254        let key = item.to_string_value();
255        if !right_set.contains(&key) && !seen.contains(&key) {
256            seen.insert(key);
257            result.push(item);
258        }
259    }
260
261    result
262}
263
264// ============================================================================
265// General sequence utilities
266// ============================================================================
267
268/// Check if a sequence is empty.
269pub fn is_empty(values: &[XmlValue]) -> bool {
270    values.is_empty()
271}
272
273/// Check if a sequence contains exactly one item.
274pub fn is_singleton(values: &[XmlValue]) -> bool {
275    values.len() == 1
276}
277
278/// Get the first item from a sequence, if any.
279pub fn head(values: &[XmlValue]) -> Option<&XmlValue> {
280    values.first()
281}
282
283/// Get all items except the first from a sequence.
284pub fn tail(values: &[XmlValue]) -> &[XmlValue] {
285    if values.is_empty() {
286        &[]
287    } else {
288        &values[1..]
289    }
290}
291
292/// Reverse a sequence.
293pub fn reverse(values: Vec<XmlValue>) -> Vec<XmlValue> {
294    let mut result = values;
295    result.reverse();
296    result
297}
298
299/// Get the count of items in a sequence.
300pub fn count(values: &[XmlValue]) -> usize {
301    values.len()
302}
303
304/// Check if a sequence contains a specific value (by value equality).
305pub fn contains_value(values: &[XmlValue], target: &XmlValue) -> bool {
306    let target_str = target.to_string_value();
307    values.iter().any(|v| v.to_string_value() == target_str)
308}
309
310/// Insert an item at a specific position.
311pub fn insert_before(values: Vec<XmlValue>, position: usize, item: XmlValue) -> Vec<XmlValue> {
312    let mut result = values;
313    let pos = position.min(result.len());
314    result.insert(pos, item);
315    result
316}
317
318/// Remove an item at a specific position.
319pub fn remove_at(values: Vec<XmlValue>, position: usize) -> Vec<XmlValue> {
320    let mut result = values;
321    if position < result.len() {
322        result.remove(position);
323    }
324    result
325}
326
327/// Subsequence (slice) of a sequence.
328///
329/// XPath uses 1-based indexing.
330pub fn subsequence(values: &[XmlValue], start: usize, length: Option<usize>) -> Vec<XmlValue> {
331    if start == 0 || start > values.len() {
332        return Vec::new();
333    }
334
335    let start_idx = start - 1; // Convert to 0-based
336    let end_idx = match length {
337        Some(len) => (start_idx + len).min(values.len()),
338        None => values.len(),
339    };
340
341    values[start_idx..end_idx].to_vec()
342}
343
344/// Get distinct values from a sequence (removes duplicates by value equality).
345///
346/// This implements fn:distinct-values which works on atomic values.
347pub fn distinct_values(values: Vec<XmlValue>) -> Vec<XmlValue> {
348    let mut seen = HashSet::new();
349    let mut result = Vec::with_capacity(values.len());
350
351    for item in values {
352        let key = item.to_string_value();
353        if !seen.contains(&key) {
354            seen.insert(key);
355            result.push(item);
356        }
357    }
358
359    result
360}
361
362/// Find the index of a value in a sequence (1-based, XPath style).
363///
364/// Returns 0 if not found.
365pub fn index_of(values: &[XmlValue], target: &XmlValue) -> usize {
366    let target_str = target.to_string_value();
367    for (i, v) in values.iter().enumerate() {
368        if v.to_string_value() == target_str {
369            return i + 1; // 1-based
370        }
371    }
372    0
373}
374
375/// Deep equality check between two atomic value sequences.
376pub fn deep_equal(left: &[XmlValue], right: &[XmlValue]) -> bool {
377    if left.len() != right.len() {
378        return false;
379    }
380
381    for (l, r) in left.iter().zip(right.iter()) {
382        if l.to_string_value() != r.to_string_value() {
383            return false;
384        }
385    }
386
387    true
388}
389
390// ============================================================================
391// Backwards compatibility aliases (deprecated)
392// ============================================================================
393
394/// Deprecated: Use `union_atomic_values` for atomic sequences or `union_nodes` for nodes.
395#[deprecated(note = "Use union_atomic_values for atomic sequences or union_nodes for nodes")]
396pub fn union_values(left: Vec<XmlValue>, right: Vec<XmlValue>) -> Vec<XmlValue> {
397    union_atomic_values(left, right)
398}
399
400/// Deprecated: Use `intersect_atomic_values` for atomic sequences or `intersect_nodes` for nodes.
401#[deprecated(
402    note = "Use intersect_atomic_values for atomic sequences or intersect_nodes for nodes"
403)]
404pub fn intersect_values(left: Vec<XmlValue>, right: Vec<XmlValue>) -> Vec<XmlValue> {
405    intersect_atomic_values(left, right)
406}
407
408/// Deprecated: Use `except_atomic_values` for atomic sequences or `except_nodes` for nodes.
409#[deprecated(note = "Use except_atomic_values for atomic sequences or except_nodes for nodes")]
410pub fn except_values(left: Vec<XmlValue>, right: Vec<XmlValue>) -> Vec<XmlValue> {
411    except_atomic_values(left, right)
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use num_bigint::BigInt;
418
419    fn int_values(nums: &[i32]) -> Vec<XmlValue> {
420        nums.iter()
421            .map(|&n| XmlValue::integer(BigInt::from(n)))
422            .collect()
423    }
424
425    fn str_values(strs: &[&str]) -> Vec<XmlValue> {
426        strs.iter().map(|&s| XmlValue::string(s)).collect()
427    }
428
429    #[test]
430    fn test_union_atomic() {
431        let left = int_values(&[1, 2, 3]);
432        let right = int_values(&[2, 3, 4]);
433        let result = union_atomic_values(left, right);
434        assert_eq!(count(&result), 4);
435    }
436
437    #[test]
438    fn test_intersect_atomic() {
439        let left = int_values(&[1, 2, 3]);
440        let right = int_values(&[2, 3, 4]);
441        let result = intersect_atomic_values(left, right);
442        assert_eq!(count(&result), 2);
443    }
444
445    #[test]
446    fn test_except_atomic() {
447        let left = int_values(&[1, 2, 3]);
448        let right = int_values(&[2, 3, 4]);
449        let result = except_atomic_values(left, right);
450        assert_eq!(count(&result), 1);
451    }
452
453    #[test]
454    fn test_is_empty() {
455        assert!(is_empty(&[]));
456        assert!(!is_empty(&int_values(&[1])));
457    }
458
459    #[test]
460    fn test_is_singleton() {
461        assert!(!is_singleton(&[]));
462        assert!(is_singleton(&int_values(&[1])));
463        assert!(!is_singleton(&int_values(&[1, 2])));
464    }
465
466    #[test]
467    fn test_head_tail() {
468        let values = int_values(&[1, 2, 3]);
469        assert_eq!(head(&values).unwrap().to_string_value(), "1");
470        assert_eq!(tail(&values).len(), 2);
471
472        let empty: Vec<XmlValue> = Vec::new();
473        assert!(head(&empty).is_none());
474        assert!(tail(&empty).is_empty());
475    }
476
477    #[test]
478    fn test_reverse() {
479        let values = int_values(&[1, 2, 3]);
480        let reversed = reverse(values);
481        assert_eq!(reversed[0].to_string_value(), "3");
482        assert_eq!(reversed[2].to_string_value(), "1");
483    }
484
485    #[test]
486    fn test_subsequence() {
487        let values = int_values(&[1, 2, 3, 4, 5]);
488
489        // Start at 2, length 3 -> [2, 3, 4]
490        let sub = subsequence(&values, 2, Some(3));
491        assert_eq!(count(&sub), 3);
492
493        // Start at 3, no length -> [3, 4, 5]
494        let sub = subsequence(&values, 3, None);
495        assert_eq!(count(&sub), 3);
496
497        // Out of bounds
498        let sub = subsequence(&values, 10, None);
499        assert!(is_empty(&sub));
500    }
501
502    #[test]
503    fn test_distinct_values() {
504        let values = str_values(&["a", "b", "a", "c", "b"]);
505        let distinct = distinct_values(values);
506        assert_eq!(count(&distinct), 3);
507    }
508
509    #[test]
510    fn test_index_of() {
511        let values = str_values(&["a", "b", "c"]);
512        assert_eq!(index_of(&values, &XmlValue::string("b")), 2);
513        assert_eq!(index_of(&values, &XmlValue::string("z")), 0);
514    }
515
516    #[test]
517    fn test_deep_equal() {
518        let a = int_values(&[1, 2, 3]);
519        let b = int_values(&[1, 2, 3]);
520        let c = int_values(&[1, 2, 4]);
521        let d = int_values(&[1, 2]);
522
523        assert!(deep_equal(&a, &b));
524        assert!(!deep_equal(&a, &c));
525        assert!(!deep_equal(&a, &d));
526    }
527
528    #[test]
529    fn test_insert_before() {
530        let values = int_values(&[1, 3]);
531        let result = insert_before(values, 1, XmlValue::integer(BigInt::from(2)));
532        assert_eq!(count(&result), 3);
533        assert_eq!(result[1].to_string_value(), "2");
534    }
535
536    #[test]
537    fn test_remove_at() {
538        let values = int_values(&[1, 2, 3]);
539        let result = remove_at(values, 1);
540        assert_eq!(count(&result), 2);
541    }
542
543    // Tests for node-only operations would require a DomNavigator implementation
544    // which is available in integration tests with roxmltree
545}