Skip to main content

xsd_schema/xpath/
atomize.rs

1//! Atomization operations for XPath evaluation.
2//!
3//! This module implements XPath 2.0 atomization rules for converting
4//! values to their atomic representations.
5//!
6//! ## Atomization Rules
7//!
8//! Atomization extracts atomic values from items:
9//!
10//! - For atomic values, returns the value itself
11//! - For nodes, returns the typed value of the node
12//! - For empty sequences, returns None
13//! - For sequences with more than one item, raises XPDY0050
14
15use super::error::XPathError;
16use super::functions::XPathValue;
17use super::iterator::XmlItem;
18use super::{DomNavigator, DomNodeType};
19use crate::navigator::TypedValue;
20use crate::types::value::{XmlAtomicValue, XmlValue, XmlValueKind};
21use crate::types::XmlTypeCode;
22
23/// Atomize a navigator node to its XDM atomic value.
24///
25/// Interprets [`TypedValue`] with proper error handling:
26/// - `Value(v)` → `Ok(Some(v))`
27/// - `Untyped` → `Ok(Some(untypedAtomic(string-value)))` (or `xs:string` for comment/PI)
28/// - `Nilled` → `Ok(None)` (empty sequence)
29/// - `Absent` → `Err(FOTY0012)`
30pub fn atomize_node<N: DomNavigator>(nav: &N) -> Result<Option<XmlValue>, XPathError> {
31    match nav.typed_value() {
32        TypedValue::Value(v) => Ok(Some(v)),
33        TypedValue::Untyped => {
34            let v = match nav.node_type() {
35                DomNodeType::Comment | DomNodeType::ProcessingInstruction => {
36                    XmlValue::string(nav.value())
37                }
38                _ => XmlValue::untyped(nav.value()),
39            };
40            Ok(Some(v))
41        }
42        TypedValue::Nilled => Ok(None),
43        TypedValue::Absent => Err(XPathError::no_typed_value()),
44    }
45}
46
47/// Atomize an XmlValue, returning its atomic representation.
48///
49/// For atomic values, this returns a clone of the value.
50/// For union values, this unwraps and atomizes the inner value.
51/// For list values, this returns an error (multiple items).
52///
53/// # Arguments
54///
55/// * `value` - The value to atomize
56///
57/// # Returns
58///
59/// * `Ok(XmlValue)` - The atomized value
60/// * `Err(XPathError)` - If atomization fails
61pub fn atomize(value: &XmlValue) -> Result<XmlValue, XPathError> {
62    match &value.value {
63        // Atomic values return themselves
64        XmlValueKind::Atomic(_) | XmlValueKind::UntypedAtomic(_) => Ok(value.clone()),
65
66        // Union: unwrap and atomize
67        XmlValueKind::Union(inner) => atomize(inner),
68
69        // List values represent multiple items - error
70        XmlValueKind::List { items, .. } if items.len() > 1 => {
71            Err(XPathError::more_than_one_item())
72        }
73
74        // Single-item list: return the item
75        XmlValueKind::List { items, item_type } if items.len() == 1 => Ok(XmlValue::new(
76            *item_type,
77            XmlValueKind::Atomic(items[0].clone()),
78        )),
79
80        // Empty list: conceptually empty sequence
81        XmlValueKind::List { .. } => Err(XPathError::type_mismatch("item()", "empty-sequence()")),
82    }
83}
84
85/// Atomize an optional value.
86///
87/// Returns None for None (empty sequence), otherwise atomizes the value.
88///
89/// # Arguments
90///
91/// * `value` - Optional value to atomize
92///
93/// # Returns
94///
95/// * `Ok(None)` - If input is None (empty sequence)
96/// * `Ok(Some(XmlValue))` - The atomized value
97/// * `Err(XPathError)` - If atomization fails
98pub fn atomize_opt(value: Option<&XmlValue>) -> Result<Option<XmlValue>, XPathError> {
99    match value {
100        None => Ok(None),
101        Some(v) => atomize(v).map(Some),
102    }
103}
104
105/// Atomize a value, requiring a non-empty result.
106///
107/// This is equivalent to `Atomize<T>` in C# - it requires the result to exist.
108///
109/// # Arguments
110///
111/// * `value` - Optional value to atomize
112///
113/// # Returns
114///
115/// * `Ok(XmlValue)` - The atomized value
116/// * `Err(XPathError)` - XPTY0004 if empty, or other atomization errors
117pub fn atomize_required(value: Option<&XmlValue>) -> Result<XmlValue, XPathError> {
118    match value {
119        None => Err(XPathError::type_mismatch("item()", "empty-sequence()")),
120        Some(v) => atomize(v),
121    }
122}
123
124/// Get the string value of an XmlValue.
125///
126/// For atomic values, this returns the canonical string representation.
127/// For union values, this unwraps and gets the string value.
128/// For list values, this joins the item strings with spaces.
129///
130/// # Arguments
131///
132/// * `value` - The value to convert to string
133///
134/// # Returns
135///
136/// The string representation of the value
137pub fn string_value(value: &XmlValue) -> String {
138    value.to_string_value()
139}
140
141/// Get the string value of an optional value.
142///
143/// Returns empty string for None (empty sequence).
144///
145/// # Arguments
146///
147/// * `value` - Optional value to convert
148///
149/// # Returns
150///
151/// The string representation, or empty string for None
152pub fn string_value_opt(value: Option<&XmlValue>) -> String {
153    match value {
154        None => String::new(),
155        Some(v) => string_value(v),
156    }
157}
158
159/// Convert a value to a double (numeric).
160///
161/// Implements the fn:number() behavior:
162/// - Returns NaN for invalid conversions
163/// - Handles UntypedAtomic by parsing as double
164/// - Handles numeric types by conversion
165///
166/// # Arguments
167///
168/// * `value` - The value to convert
169///
170/// # Returns
171///
172/// The numeric value as f64, or NaN if conversion fails
173pub fn to_number(value: &XmlValue) -> f64 {
174    match &value.value {
175        XmlValueKind::Atomic(atom) => atomic_to_number(atom),
176        XmlValueKind::UntypedAtomic(s) => s.trim().parse().unwrap_or(f64::NAN),
177        XmlValueKind::Union(inner) => to_number(inner),
178        XmlValueKind::List { .. } => f64::NAN,
179    }
180}
181
182/// Convert an atomic value to a double.
183fn atomic_to_number(atom: &XmlAtomicValue) -> f64 {
184    match atom {
185        XmlAtomicValue::Double(d) => *d,
186        XmlAtomicValue::Float(f) => *f as f64,
187        XmlAtomicValue::Decimal(d) => d.to_string().parse().unwrap_or(f64::NAN),
188        XmlAtomicValue::Integer(i) => i.to_string().parse().unwrap_or(f64::NAN),
189        XmlAtomicValue::Boolean(b) => {
190            if *b {
191                1.0
192            } else {
193                0.0
194            }
195        }
196        XmlAtomicValue::String(s) => s.trim().parse().unwrap_or(f64::NAN),
197        _ => f64::NAN,
198    }
199}
200
201/// Convert an optional value to a double.
202///
203/// Returns NaN for None (empty sequence).
204pub fn to_number_opt(value: Option<&XmlValue>) -> f64 {
205    match value {
206        None => f64::NAN,
207        Some(v) => to_number(v),
208    }
209}
210
211/// Check if a value is empty (represents an empty sequence).
212///
213/// Note: XmlValue itself doesn't have an "empty" variant.
214/// This checks for empty lists or None optionals.
215pub fn is_empty_list(value: &XmlValue) -> bool {
216    matches!(&value.value, XmlValueKind::List { items, .. } if items.is_empty())
217}
218
219/// Get the type code of the underlying atomic value.
220///
221/// For union types, returns the type code of the actual member type.
222pub fn effective_type_code(value: &XmlValue) -> XmlTypeCode {
223    match &value.value {
224        XmlValueKind::Union(inner) => effective_type_code(inner),
225        _ => value.type_code,
226    }
227}
228
229/// Check if a value is a node (in XPath terms).
230///
231/// Returns true if the type code indicates a node type.
232pub fn is_node_type(type_code: XmlTypeCode) -> bool {
233    matches!(
234        type_code,
235        XmlTypeCode::Node
236            | XmlTypeCode::Document
237            | XmlTypeCode::Element
238            | XmlTypeCode::Attribute
239            | XmlTypeCode::Namespace
240            | XmlTypeCode::ProcessingInstruction
241            | XmlTypeCode::Comment
242            | XmlTypeCode::Text
243    )
244}
245
246/// Check if a value represents a node.
247pub fn is_node(value: &XmlValue) -> bool {
248    is_node_type(effective_type_code(value))
249}
250
251/// Unwrap a union value to its member value.
252///
253/// Recursively unwraps nested unions.
254pub fn unwrap_union(value: &XmlValue) -> &XmlValue {
255    match &value.value {
256        XmlValueKind::Union(inner) => unwrap_union(inner),
257        _ => value,
258    }
259}
260
261/// Extract the string value of the first node in an XPathValue (XPath 1.0 rule).
262///
263/// In XPath 1.0, converting a node-set to string returns the string-value
264/// of the first node in document order, or "" if empty.
265/// For atomic values, delegates to the standard string conversion.
266pub(crate) fn first_node_string_value<N: DomNavigator>(value: &XPathValue<N>) -> String {
267    match value {
268        XPathValue::Empty => String::new(),
269        XPathValue::Item(XmlItem::Node(n)) => n.value(),
270        XPathValue::Item(XmlItem::Atomic(v)) => v.to_string_value(),
271        XPathValue::Sequence(items) => {
272            // Find the document-order-first node in a single pass
273            let mut first_node: Option<&N> = None;
274            for item in items {
275                if let XmlItem::Node(n) = item {
276                    if let Some(current) = first_node {
277                        if crate::xpath::node_ops::compare_document_order(n, current)
278                            == std::cmp::Ordering::Less
279                        {
280                            first_node = Some(n);
281                        }
282                    } else {
283                        first_node = Some(n);
284                    }
285                }
286            }
287            if let Some(n) = first_node {
288                return n.value();
289            }
290            // Fallback: if no nodes, use first atomic's string value
291            if let Some(XmlItem::Atomic(v)) = items.first() {
292                v.to_string_value()
293            } else {
294                String::new()
295            }
296        }
297    }
298}
299
300/// Convert an XPathValue to string using XPath 1.0 rules.
301///
302/// Same as `first_node_string_value` — for node-sets, uses first node.
303/// For atomics, uses canonical string form.
304pub(crate) fn to_string_10<N: DomNavigator>(value: &XPathValue<N>) -> String {
305    first_node_string_value(value)
306}
307
308/// Convert an XPathValue to number using XPath 1.0 rules.
309///
310/// Converts to string first (via `to_string_10`), then parses as f64.
311pub(crate) fn to_number_10<N: DomNavigator>(value: &XPathValue<N>) -> f64 {
312    match value {
313        XPathValue::Empty => f64::NAN,
314        XPathValue::Item(XmlItem::Atomic(v)) => to_number(v),
315        _ => to_string_10(value).trim().parse().unwrap_or(f64::NAN),
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322    use num_bigint::BigInt;
323    use rust_decimal::Decimal;
324
325    #[test]
326    fn test_atomize_atomic() {
327        let value = XmlValue::string("hello");
328        let result = atomize(&value).unwrap();
329        assert_eq!(result.to_string_value(), "hello");
330    }
331
332    #[test]
333    fn test_atomize_untyped() {
334        let value = XmlValue::untyped("test");
335        let result = atomize(&value).unwrap();
336        assert_eq!(result.to_string_value(), "test");
337    }
338
339    #[test]
340    fn test_atomize_opt_none() {
341        let result = atomize_opt(None).unwrap();
342        assert!(result.is_none());
343    }
344
345    #[test]
346    fn test_atomize_opt_some() {
347        let value = XmlValue::integer(BigInt::from(42));
348        let result = atomize_opt(Some(&value)).unwrap();
349        assert!(result.is_some());
350    }
351
352    #[test]
353    fn test_atomize_required_none() {
354        let result = atomize_required(None);
355        assert!(result.is_err());
356        if let Err(XPathError::XPTY0004 { .. }) = result {
357            // Expected
358        } else {
359            panic!("Expected XPTY0004 error");
360        }
361    }
362
363    #[test]
364    fn test_string_value() {
365        assert_eq!(string_value(&XmlValue::string("hello")), "hello");
366        assert_eq!(string_value(&XmlValue::boolean(true)), "true");
367        assert_eq!(string_value(&XmlValue::integer(BigInt::from(123))), "123");
368    }
369
370    #[test]
371    fn test_string_value_opt_none() {
372        assert_eq!(string_value_opt(None), "");
373    }
374
375    #[test]
376    fn test_to_number() {
377        assert_eq!(to_number(&XmlValue::double(2.5)), 2.5);
378        assert_eq!(to_number(&XmlValue::float(2.5)), 2.5);
379        assert_eq!(to_number(&XmlValue::integer(BigInt::from(42))), 42.0);
380        assert_eq!(to_number(&XmlValue::decimal(Decimal::new(125, 2))), 1.25);
381        assert_eq!(to_number(&XmlValue::string("2.5")), 2.5);
382        assert!(to_number(&XmlValue::string("not a number")).is_nan());
383    }
384
385    #[test]
386    fn test_to_number_opt_none() {
387        assert!(to_number_opt(None).is_nan());
388    }
389
390    #[test]
391    fn test_to_number_untyped() {
392        assert_eq!(to_number(&XmlValue::untyped("42.5")), 42.5);
393        assert_eq!(to_number(&XmlValue::untyped("  2.5  ")), 2.5); // Trimmed
394    }
395
396    #[test]
397    fn test_effective_type_code() {
398        let value = XmlValue::string("test");
399        assert_eq!(effective_type_code(&value), XmlTypeCode::String);
400
401        let value = XmlValue::integer(BigInt::from(1));
402        assert_eq!(effective_type_code(&value), XmlTypeCode::Integer);
403    }
404
405    #[test]
406    fn test_is_node_type() {
407        assert!(is_node_type(XmlTypeCode::Element));
408        assert!(is_node_type(XmlTypeCode::Attribute));
409        assert!(is_node_type(XmlTypeCode::Document));
410        assert!(!is_node_type(XmlTypeCode::String));
411        assert!(!is_node_type(XmlTypeCode::Integer));
412    }
413
414    #[test]
415    fn test_is_node() {
416        // Atomic values are not nodes
417        assert!(!is_node(&XmlValue::string("test")));
418        assert!(!is_node(&XmlValue::integer(BigInt::from(1))));
419
420        // A value with node type code would be a node
421        // (We can't easily create one without a navigator, but we test the type check)
422        let node_value = XmlValue::new(
423            XmlTypeCode::Element,
424            XmlValueKind::UntypedAtomic("element content".to_string()),
425        );
426        assert!(is_node(&node_value));
427    }
428
429    // --- XPath 1.0 conversion tests ---
430
431    use crate::xpath::RoXmlNavigator;
432
433    #[test]
434    fn test_first_node_string_value_empty() {
435        let value: XPathValue<RoXmlNavigator<'static>> = XPathValue::empty();
436        assert_eq!(first_node_string_value(&value), "");
437    }
438
439    #[test]
440    fn test_first_node_string_value_single_atomic() {
441        let value: XPathValue<RoXmlNavigator<'static>> = XPathValue::string("hello");
442        assert_eq!(first_node_string_value(&value), "hello");
443    }
444
445    #[test]
446    fn test_first_node_string_value_single_node() {
447        let doc = roxmltree::Document::parse("<root>text content</root>").unwrap();
448        let mut nav = RoXmlNavigator::new(&doc);
449        nav.move_to_first_child(); // move to <root>
450        let value = XPathValue::from_node(nav);
451        assert_eq!(first_node_string_value(&value), "text content");
452    }
453
454    #[test]
455    fn test_first_node_string_value_multi_node_sequence() {
456        let doc = roxmltree::Document::parse("<r><a>first</a><b>second</b></r>").unwrap();
457        let mut nav_a = RoXmlNavigator::new(&doc);
458        nav_a.move_to_first_child(); // <r>
459        nav_a.move_to_first_child(); // <a>
460        let mut nav_b = nav_a.clone();
461        nav_b.move_to_next_sibling(); // <b>
462        let value = XPathValue::from_sequence(vec![XmlItem::Node(nav_a), XmlItem::Node(nav_b)]);
463        // XPath 1.0: first node's string value
464        assert_eq!(first_node_string_value(&value), "first");
465    }
466
467    #[test]
468    fn test_to_string_10_delegates() {
469        let value: XPathValue<RoXmlNavigator<'static>> = XPathValue::string("abc");
470        assert_eq!(to_string_10(&value), "abc");
471    }
472
473    #[test]
474    fn test_to_number_10_empty() {
475        let value: XPathValue<RoXmlNavigator<'static>> = XPathValue::empty();
476        assert!(to_number_10(&value).is_nan());
477    }
478
479    #[test]
480    fn test_to_number_10_atomic() {
481        let value: XPathValue<RoXmlNavigator<'static>> = XPathValue::double(2.75);
482        assert_eq!(to_number_10(&value), 2.75);
483    }
484
485    #[test]
486    fn test_to_number_10_node_numeric() {
487        let doc = roxmltree::Document::parse("<n>42.5</n>").unwrap();
488        let mut nav = RoXmlNavigator::new(&doc);
489        nav.move_to_first_child(); // <n>
490        let value = XPathValue::from_node(nav);
491        assert_eq!(to_number_10(&value), 42.5);
492    }
493
494    #[test]
495    fn test_to_number_10_node_non_numeric() {
496        let doc = roxmltree::Document::parse("<n>not a number</n>").unwrap();
497        let mut nav = RoXmlNavigator::new(&doc);
498        nav.move_to_first_child(); // <n>
499        let value = XPathValue::from_node(nav);
500        assert!(to_number_10(&value).is_nan());
501    }
502}