ddex_parser/parser/
xpath_selector.rs

1// src/parser/xpath_selector.rs
2//! XPath-like selector functionality for efficient XML element selection
3
4use crate::error::ParseError;
5use quick_xml::{events::Event, Reader};
6use std::collections::HashMap;
7use std::io::BufRead;
8
9/// XPath-like selector for extracting specific XML elements and values
10#[derive(Debug, Clone)]
11pub struct XPathSelector {
12    /// Parsed path components
13    path: Vec<PathComponent>,
14    /// Enable namespace-aware matching
15    namespace_aware: bool,
16    /// Enable case-sensitive matching
17    case_sensitive: bool,
18    /// Maximum results to return (0 = unlimited)
19    max_results: usize,
20    /// Skip validation for better performance
21    fast_mode: bool,
22    /// Pre-compiled element name cache for performance
23    #[allow(dead_code)] // Future optimization feature
24    element_cache: std::collections::HashMap<String, String>,
25}
26
27/// Component of an XPath expression
28#[derive(Debug, Clone, PartialEq)]
29pub enum PathComponent {
30    /// Element name (e.g., "Release")
31    Element(String),
32    /// Wildcard match any element (*)
33    Wildcard,
34    /// Descendant-or-self axis (//)
35    DescendantOrSelf,
36    /// Attribute selector ([@attr="value"])
37    AttributeFilter {
38        element: String,
39        attribute: String,
40        value: Option<String>,
41    },
42    /// Index selector ([1], [2], etc.)
43    IndexFilter { element: String, index: usize },
44}
45
46/// Result of XPath selection
47#[derive(Debug, Clone)]
48pub struct XPathResult {
49    /// Selected values
50    pub values: Vec<String>,
51    /// Element paths where matches were found
52    pub paths: Vec<String>,
53    /// Attributes found at matching elements
54    pub attributes: Vec<HashMap<String, String>>,
55    /// Performance statistics
56    pub stats: XPathStats,
57}
58
59/// Performance statistics for XPath selection
60#[derive(Debug, Clone)]
61pub struct XPathStats {
62    pub elements_processed: usize,
63    pub matches_found: usize,
64    pub bytes_processed: usize,
65    pub duration: std::time::Duration,
66}
67
68impl XPathSelector {
69    /// Create a new XPath selector
70    pub fn new(xpath: &str) -> Result<Self, ParseError> {
71        let path = Self::parse_xpath(xpath)?;
72
73        Ok(Self {
74            path,
75            namespace_aware: true,
76            case_sensitive: false,
77            max_results: 0,
78            fast_mode: false,
79            element_cache: HashMap::new(),
80        })
81    }
82
83    /// Create selector for common DDEX patterns
84    pub fn ddex_release_titles() -> Result<Self, ParseError> {
85        Self::new("//Release/ReferenceTitle/TitleText")
86    }
87
88    /// Create selector for ISRC extraction
89    pub fn ddex_isrcs() -> Result<Self, ParseError> {
90        Self::new("//SoundRecordingId")
91    }
92
93    /// Create selector for release IDs
94    pub fn ddex_release_ids() -> Result<Self, ParseError> {
95        Self::new("//ReleaseId")
96    }
97
98    /// Set namespace awareness
99    pub fn namespace_aware(mut self, enabled: bool) -> Self {
100        self.namespace_aware = enabled;
101        self
102    }
103
104    /// Set case sensitivity
105    pub fn case_sensitive(mut self, enabled: bool) -> Self {
106        self.case_sensitive = enabled;
107        self
108    }
109
110    /// Set maximum number of results
111    pub fn max_results(mut self, max: usize) -> Self {
112        self.max_results = max;
113        self
114    }
115
116    /// Enable fast mode (skip some validations for better performance)
117    pub fn fast_mode(mut self, enabled: bool) -> Self {
118        self.fast_mode = enabled;
119        self
120    }
121
122    /// Select elements matching the XPath expression
123    pub fn select<R: BufRead>(&self, reader: R) -> Result<XPathResult, ParseError> {
124        let start_time = std::time::Instant::now();
125        let mut xml_reader = Reader::from_reader(reader);
126        xml_reader.config_mut().trim_text(true);
127
128        // Performance optimizations
129        if self.fast_mode {
130            xml_reader.config_mut().check_end_names = false;
131            xml_reader.config_mut().check_comments = false;
132        }
133
134        let mut results = Vec::new();
135        let mut paths = Vec::new();
136        let mut attributes = Vec::new();
137        let mut buf = Vec::new();
138        let mut current_path = Vec::new();
139        let mut capture_context = Vec::new();
140        let mut elements_processed = 0;
141
142        loop {
143            match xml_reader.read_event_into(&mut buf) {
144                Ok(Event::Start(ref e)) => {
145                    elements_processed += 1;
146                    let element_name = self.extract_element_name(e.name().as_ref())?;
147                    current_path.push(element_name.clone());
148
149                    // Extract attributes for potential filtering
150                    let mut attr_map = HashMap::new();
151                    for attr in e.attributes().flatten() {
152                        let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
153                        let value = String::from_utf8_lossy(&attr.value).to_string();
154                        attr_map.insert(key, value);
155                    }
156
157                    // Check if this element matches our selector with attribute filtering
158                    if self.matches_path_with_attributes(&current_path, &attr_map) {
159                        capture_context.push(CaptureContext {
160                            path: current_path.join("/"),
161                            attributes: attr_map,
162                            capture_text: true,
163                        });
164                    }
165                }
166                Ok(Event::End(_)) => {
167                    current_path.pop();
168
169                    // End any active capture
170                    if !capture_context.is_empty() {
171                        capture_context.pop();
172                    }
173                }
174                Ok(Event::Empty(ref e)) => {
175                    elements_processed += 1;
176                    let element_name = self.extract_element_name(e.name().as_ref())?;
177                    current_path.push(element_name);
178
179                    // Check for match on empty element
180                    if self.matches_path(&current_path) {
181                        let mut attr_map = HashMap::new();
182                        for attr in e.attributes().flatten() {
183                            let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
184                            let value = String::from_utf8_lossy(&attr.value).to_string();
185                            attr_map.insert(key, value);
186                        }
187
188                        // For empty elements, capture attribute values or empty string
189                        let value = self.get_main_attribute(&attr_map).unwrap_or_default();
190
191                        results.push(value);
192                        paths.push(current_path.join("/"));
193                        attributes.push(attr_map);
194
195                        if self.max_results > 0 && results.len() >= self.max_results {
196                            break;
197                        }
198                    }
199
200                    current_path.pop();
201                }
202                Ok(Event::Text(ref e)) => {
203                    if !capture_context.is_empty() {
204                        // Use utf8_utils for proper UTF-8 handling
205                        let current_pos = xml_reader.buffer_position() as usize;
206                        let text = crate::utf8_utils::handle_text_node(e, current_pos)?
207                            .trim()
208                            .to_string();
209
210                        if !text.is_empty() {
211                            let context = capture_context.last().unwrap();
212                            results.push(text);
213                            paths.push(context.path.clone());
214                            attributes.push(context.attributes.clone());
215
216                            if self.max_results > 0 && results.len() >= self.max_results {
217                                break;
218                            }
219                        }
220                    }
221                }
222                Ok(Event::CData(ref e)) => {
223                    if !capture_context.is_empty() {
224                        let text = String::from_utf8_lossy(e).trim().to_string();
225                        if !text.is_empty() {
226                            let context = capture_context.last().unwrap();
227                            results.push(text);
228                            paths.push(context.path.clone());
229                            attributes.push(context.attributes.clone());
230
231                            if self.max_results > 0 && results.len() >= self.max_results {
232                                break;
233                            }
234                        }
235                    }
236                }
237                Ok(Event::Eof) => break,
238                Err(e) => {
239                    return Err(ParseError::XmlError {
240                        message: format!("XML parsing error: {}", e),
241                        location: crate::error::ErrorLocation {
242                            line: 0,
243                            column: 0,
244                            byte_offset: Some(xml_reader.buffer_position() as usize),
245                            path: "xpath_selector".to_string(),
246                        },
247                    });
248                }
249                _ => {} // Skip other events
250            }
251            buf.clear();
252        }
253
254        Ok(XPathResult {
255            values: results.clone(),
256            paths,
257            attributes,
258            stats: XPathStats {
259                elements_processed,
260                matches_found: results.len(),
261                bytes_processed: xml_reader.buffer_position() as usize,
262                duration: start_time.elapsed(),
263            },
264        })
265    }
266
267    /// Parse XPath expression into path components
268    fn parse_xpath(xpath: &str) -> Result<Vec<PathComponent>, ParseError> {
269        let mut components = Vec::new();
270        let mut parts = Vec::new();
271
272        // Split by '/' but handle '//' specially
273        let mut chars = xpath.chars().peekable();
274        let mut current = String::new();
275
276        while let Some(ch) = chars.next() {
277            match ch {
278                '/' => {
279                    if chars.peek() == Some(&'/') {
280                        chars.next(); // consume second '/'
281                        if !current.is_empty() {
282                            parts.push(current);
283                            current = String::new();
284                        }
285                        parts.push("//".to_string());
286                    } else if !current.is_empty() {
287                        parts.push(current);
288                        current = String::new();
289                    }
290                }
291                _ => current.push(ch),
292            }
293        }
294
295        if !current.is_empty() {
296            parts.push(current);
297        }
298
299        // Parse each part into components
300        for part in parts {
301            if part.is_empty() {
302                continue;
303            } else if part == "//" {
304                components.push(PathComponent::DescendantOrSelf);
305            } else if part == "*" {
306                components.push(PathComponent::Wildcard);
307            } else if part.contains('[') && part.contains(']') {
308                // Parse attribute or index filters
309                let (element, filter) = Self::parse_filter(&part)?;
310
311                if filter.starts_with('@') {
312                    // Attribute filter [@attr] or [@attr="value"]
313                    let attr_expr = &filter[1..]; // Remove @
314                    if let Some(eq_pos) = attr_expr.find('=') {
315                        let attr_name = attr_expr[..eq_pos].to_string();
316                        let attr_value = attr_expr[eq_pos + 1..]
317                            .trim_matches('"')
318                            .trim_matches('\'')
319                            .to_string();
320                        components.push(PathComponent::AttributeFilter {
321                            element,
322                            attribute: attr_name,
323                            value: Some(attr_value),
324                        });
325                    } else {
326                        components.push(PathComponent::AttributeFilter {
327                            element,
328                            attribute: attr_expr.to_string(),
329                            value: None,
330                        });
331                    }
332                } else if let Ok(index) = filter.parse::<usize>() {
333                    // Index filter [1], [2], etc.
334                    components.push(PathComponent::IndexFilter { element, index });
335                } else {
336                    return Err(ParseError::XmlError {
337                        message: format!("Invalid filter expression: [{}]", filter),
338                        location: crate::error::ErrorLocation {
339                            line: 0,
340                            column: 0,
341                            byte_offset: None,
342                            path: "xpath_parser".to_string(),
343                        },
344                    });
345                }
346            } else {
347                components.push(PathComponent::Element(part));
348            }
349        }
350
351        Ok(components)
352    }
353
354    /// Parse filter expression like "element[filter]"
355    fn parse_filter(input: &str) -> Result<(String, String), ParseError> {
356        if let Some(bracket_start) = input.find('[') {
357            if let Some(bracket_end) = input.rfind(']') {
358                let element = input[..bracket_start].to_string();
359                let filter = input[bracket_start + 1..bracket_end].to_string();
360                return Ok((element, filter));
361            }
362        }
363
364        Err(ParseError::XmlError {
365            message: format!("Invalid filter syntax: {}", input),
366            location: crate::error::ErrorLocation {
367                line: 0,
368                column: 0,
369                byte_offset: None,
370                path: "xpath_parser".to_string(),
371            },
372        })
373    }
374
375    /// Check if current path matches the selector
376    fn matches_path(&self, current: &[String]) -> bool {
377        self.match_components(&self.path, current, 0, 0, &HashMap::new())
378    }
379
380    /// Check if current path matches with attribute filtering
381    fn matches_path_with_attributes(
382        &self,
383        current: &[String],
384        attributes: &HashMap<String, String>,
385    ) -> bool {
386        self.match_components(&self.path, current, 0, 0, attributes)
387    }
388
389    /// Recursively match path components against current path
390    fn match_components(
391        &self,
392        components: &[PathComponent],
393        current: &[String],
394        comp_idx: usize,
395        path_idx: usize,
396        attributes: &HashMap<String, String>,
397    ) -> bool {
398        // If we've matched all components, success
399        if comp_idx >= components.len() {
400            return true;
401        }
402
403        // If we've run out of path but still have components, no match
404        if path_idx >= current.len() {
405            return false;
406        }
407
408        match &components[comp_idx] {
409            PathComponent::Element(name) => {
410                if self.element_matches(name, &current[path_idx]) {
411                    // Exact match, advance both
412                    self.match_components(
413                        components,
414                        current,
415                        comp_idx + 1,
416                        path_idx + 1,
417                        attributes,
418                    )
419                } else {
420                    false
421                }
422            }
423            PathComponent::Wildcard => {
424                // Wildcard matches any element, advance both
425                self.match_components(components, current, comp_idx + 1, path_idx + 1, attributes)
426            }
427            PathComponent::DescendantOrSelf => {
428                // Try matching the next component at any remaining position
429                for i in path_idx..current.len() {
430                    if self.match_components(components, current, comp_idx + 1, i, attributes) {
431                        return true;
432                    }
433                }
434                false
435            }
436            PathComponent::AttributeFilter {
437                element,
438                attribute,
439                value,
440            } => {
441                if self.element_matches(element, &current[path_idx]) {
442                    // Check attribute filtering
443                    if let Some(attr_value) = attributes.get(attribute) {
444                        if let Some(expected_value) = value {
445                            // Attribute must have specific value
446                            if expected_value == attr_value {
447                                self.match_components(
448                                    components,
449                                    current,
450                                    comp_idx + 1,
451                                    path_idx + 1,
452                                    attributes,
453                                )
454                            } else {
455                                false
456                            }
457                        } else {
458                            // Attribute just needs to exist
459                            self.match_components(
460                                components,
461                                current,
462                                comp_idx + 1,
463                                path_idx + 1,
464                                attributes,
465                            )
466                        }
467                    } else {
468                        false // Attribute doesn't exist
469                    }
470                } else {
471                    false
472                }
473            }
474            PathComponent::IndexFilter { element, index } => {
475                if self.element_matches(element, &current[path_idx]) {
476                    // For index filtering, we'd need to count elements at this level
477                    // For now, just match the first occurrence (index 1)
478                    if *index == 1 {
479                        self.match_components(
480                            components,
481                            current,
482                            comp_idx + 1,
483                            path_idx + 1,
484                            attributes,
485                        )
486                    } else {
487                        // More sophisticated index tracking would be needed
488                        self.match_components(
489                            components,
490                            current,
491                            comp_idx + 1,
492                            path_idx + 1,
493                            attributes,
494                        )
495                    }
496                } else {
497                    false
498                }
499            }
500        }
501    }
502
503    /// Check if element name matches, considering namespace and case sensitivity
504    fn element_matches(&self, pattern: &str, actual: &str) -> bool {
505        let actual_local = if self.namespace_aware {
506            // Extract local name after ':'
507            actual.split(':').next_back().unwrap_or(actual)
508        } else {
509            actual
510        };
511
512        if self.case_sensitive {
513            pattern == actual_local
514        } else {
515            pattern.eq_ignore_ascii_case(actual_local)
516        }
517    }
518
519    /// Extract element name from QName bytes
520    fn extract_element_name(&self, qname: &[u8]) -> Result<String, ParseError> {
521        let name_str = std::str::from_utf8(qname).map_err(|_| ParseError::Io {
522            message: "Invalid UTF-8 in element name".to_string(),
523        })?;
524
525        Ok(name_str.to_string())
526    }
527
528    /// Get main attribute value (common patterns like Namespace, value, etc.)
529    fn get_main_attribute(&self, attributes: &HashMap<String, String>) -> Option<String> {
530        // Try common attribute names
531        for attr_name in &["value", "Namespace", "id", "ref"] {
532            if let Some(value) = attributes.get(*attr_name) {
533                return Some(value.clone());
534            }
535        }
536
537        // Return first attribute value if any
538        attributes.values().next().cloned()
539    }
540}
541
542/// Context for capturing element content
543#[derive(Debug, Clone)]
544struct CaptureContext {
545    path: String,
546    attributes: HashMap<String, String>,
547    #[allow(dead_code)] // Future text capture feature
548    capture_text: bool,
549}
550
551/// Convenience functions for common DDEX XPath patterns
552impl XPathSelector {
553    /// Select all release titles
554    pub fn select_release_titles<R: BufRead>(reader: R) -> Result<Vec<String>, ParseError> {
555        let selector = Self::ddex_release_titles()?;
556        let result = selector.select(reader)?;
557        Ok(result.values)
558    }
559
560    /// Select all ISRCs
561    pub fn select_isrcs<R: BufRead>(reader: R) -> Result<Vec<String>, ParseError> {
562        let selector = Self::ddex_isrcs()?;
563        let result = selector.select(reader)?;
564        Ok(result.values)
565    }
566
567    /// Select elements with custom XPath
568    pub fn select_with_xpath<R: BufRead>(
569        reader: R,
570        xpath: &str,
571    ) -> Result<Vec<String>, ParseError> {
572        let selector = Self::new(xpath)?;
573        let result = selector.select(reader)?;
574        Ok(result.values)
575    }
576
577    /// High-performance batch selection for multiple XPath expressions
578    pub fn select_multiple<R: BufRead>(
579        reader: R,
580        xpaths: &[&str],
581    ) -> Result<Vec<Vec<String>>, ParseError> {
582        let mut selectors = Vec::new();
583        for xpath in xpaths {
584            selectors.push(Self::new(xpath)?.fast_mode(true));
585        }
586
587        let mut xml_reader = Reader::from_reader(reader);
588        xml_reader.config_mut().trim_text(true);
589        xml_reader.config_mut().check_end_names = false;
590        xml_reader.config_mut().check_comments = false;
591
592        let mut all_results: Vec<Vec<String>> = vec![Vec::new(); selectors.len()];
593        let mut buf = Vec::new();
594        let mut current_path = Vec::new();
595        let mut capture_contexts: Vec<Vec<CaptureContext>> = vec![Vec::new(); selectors.len()];
596
597        loop {
598            match xml_reader.read_event_into(&mut buf) {
599                Ok(Event::Start(ref e)) => {
600                    let element_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
601                    current_path.push(element_name.clone());
602
603                    // Extract attributes once for all selectors
604                    let mut attr_map = HashMap::new();
605                    for attr in e.attributes().flatten() {
606                        let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
607                        let value = String::from_utf8_lossy(&attr.value).to_string();
608                        attr_map.insert(key, value);
609                    }
610
611                    // Check against all selectors
612                    for (i, selector) in selectors.iter().enumerate() {
613                        if selector.matches_path_with_attributes(&current_path, &attr_map) {
614                            capture_contexts[i].push(CaptureContext {
615                                path: current_path.join("/"),
616                                attributes: attr_map.clone(),
617                                capture_text: true,
618                            });
619                        }
620                    }
621                }
622                Ok(Event::End(_)) => {
623                    current_path.pop();
624                    for contexts in &mut capture_contexts {
625                        if !contexts.is_empty() {
626                            contexts.pop();
627                        }
628                    }
629                }
630                Ok(Event::Text(ref e)) => {
631                    // Use utf8_utils for proper UTF-8 handling
632                    let current_pos = xml_reader.buffer_position() as usize;
633                    let text = crate::utf8_utils::handle_text_node(e, current_pos)?
634                        .trim()
635                        .to_string();
636
637                    if !text.is_empty() {
638                        for (i, contexts) in capture_contexts.iter().enumerate() {
639                            if !contexts.is_empty() {
640                                all_results[i].push(text.clone());
641                            }
642                        }
643                    }
644                }
645                Ok(Event::Eof) => break,
646                Err(e) => {
647                    return Err(ParseError::XmlError {
648                        message: format!("XML parsing error: {}", e),
649                        location: crate::error::ErrorLocation {
650                            line: 0,
651                            column: 0,
652                            byte_offset: Some(xml_reader.buffer_position() as usize),
653                            path: "xpath_batch_selector".to_string(),
654                        },
655                    });
656                }
657                _ => {}
658            }
659            buf.clear();
660        }
661
662        Ok(all_results)
663    }
664}
665
666#[cfg(test)]
667mod tests {
668    use super::*;
669    use std::io::Cursor;
670
671    #[test]
672    fn test_xpath_parsing() {
673        let selector = XPathSelector::new("//Release/Title").expect("Valid XPath");
674        assert_eq!(selector.path.len(), 3);
675
676        match &selector.path[0] {
677            PathComponent::DescendantOrSelf => {}
678            _ => panic!("Expected DescendantOrSelf"),
679        }
680
681        match &selector.path[1] {
682            PathComponent::Element(name) => assert_eq!(name, "Release"),
683            _ => panic!("Expected Element(Release)"),
684        }
685    }
686
687    #[test]
688    fn test_simple_element_selection() {
689        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
690        <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
691            <ern:MessageHeader>
692                <ern:MessageId>MSG001</ern:MessageId>
693            </ern:MessageHeader>
694            <ern:ReleaseList>
695                <ern:Release>
696                    <ern:ReleaseId>REL001</ern:ReleaseId>
697                    <ern:ReferenceTitle>
698                        <ern:TitleText>My Album Title</ern:TitleText>
699                    </ern:ReferenceTitle>
700                </ern:Release>
701            </ern:ReleaseList>
702        </ern:NewReleaseMessage>"#;
703
704        let cursor = Cursor::new(xml.as_bytes());
705        let selector = XPathSelector::new("//TitleText").expect("Valid XPath");
706        let result = selector.select(cursor).expect("Selection should work");
707
708        assert_eq!(result.values.len(), 1);
709        assert_eq!(result.values[0], "My Album Title");
710        assert!(result.stats.elements_processed > 0);
711    }
712
713    #[test]
714    fn test_wildcard_selection() {
715        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
716        <root>
717            <section>
718                <item>Value 1</item>
719                <item>Value 2</item>
720            </section>
721            <section>
722                <item>Value 3</item>
723            </section>
724        </root>"#;
725
726        let cursor = Cursor::new(xml.as_bytes());
727        let selector = XPathSelector::new("//*/item").expect("Valid XPath");
728        let result = selector.select(cursor).expect("Selection should work");
729
730        assert_eq!(result.values.len(), 3);
731        assert!(result.values.contains(&"Value 1".to_string()));
732        assert!(result.values.contains(&"Value 2".to_string()));
733        assert!(result.values.contains(&"Value 3".to_string()));
734    }
735
736    #[test]
737    fn test_descendant_or_self() {
738        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
739        <root>
740            <level1>
741                <level2>
742                    <target>Deep Value</target>
743                </level2>
744            </level1>
745            <target>Shallow Value</target>
746        </root>"#;
747
748        let cursor = Cursor::new(xml.as_bytes());
749        let selector = XPathSelector::new("//target").expect("Valid XPath");
750        let result = selector.select(cursor).expect("Selection should work");
751
752        assert_eq!(result.values.len(), 2);
753        assert!(result.values.contains(&"Deep Value".to_string()));
754        assert!(result.values.contains(&"Shallow Value".to_string()));
755    }
756
757    #[test]
758    fn test_max_results_limit() {
759        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
760        <root>
761            <item>1</item>
762            <item>2</item>
763            <item>3</item>
764            <item>4</item>
765            <item>5</item>
766        </root>"#;
767
768        let cursor = Cursor::new(xml.as_bytes());
769        let selector = XPathSelector::new("//item")
770            .expect("Valid XPath")
771            .max_results(3);
772        let result = selector.select(cursor).expect("Selection should work");
773
774        assert_eq!(result.values.len(), 3);
775        assert_eq!(result.stats.matches_found, 3);
776    }
777
778    #[test]
779    fn test_namespace_awareness() {
780        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
781        <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
782            <ern:Release>
783                <ern:ReleaseId>REL001</ern:ReleaseId>
784            </ern:Release>
785        </ern:NewReleaseMessage>"#;
786
787        // With namespace awareness (default)
788        let cursor1 = Cursor::new(xml.as_bytes());
789        let selector1 = XPathSelector::new("//ReleaseId")
790            .expect("Valid XPath")
791            .namespace_aware(true);
792        let result1 = selector1.select(cursor1).expect("Selection should work");
793        assert_eq!(result1.values.len(), 1);
794
795        // Without namespace awareness
796        let cursor2 = Cursor::new(xml.as_bytes());
797        let selector2 = XPathSelector::new("//ReleaseId")
798            .expect("Valid XPath")
799            .namespace_aware(false);
800        let result2 = selector2.select(cursor2).expect("Selection should work");
801        assert_eq!(result2.values.len(), 0); // Won't match ern:ReleaseId
802    }
803
804    #[test]
805    fn test_case_sensitivity() {
806        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
807        <root>
808            <ReleaseId>REL001</ReleaseId>
809            <releaseid>REL002</releaseid>
810        </root>"#;
811
812        // Case insensitive (default)
813        let cursor1 = Cursor::new(xml.as_bytes());
814        let selector1 = XPathSelector::new("//releaseid")
815            .expect("Valid XPath")
816            .case_sensitive(false);
817        let result1 = selector1.select(cursor1).expect("Selection should work");
818        assert_eq!(result1.values.len(), 2); // Matches both
819
820        // Case sensitive
821        let cursor2 = Cursor::new(xml.as_bytes());
822        let selector2 = XPathSelector::new("//releaseid")
823            .expect("Valid XPath")
824            .case_sensitive(true);
825        let result2 = selector2.select(cursor2).expect("Selection should work");
826        assert_eq!(result2.values.len(), 1); // Only exact match
827    }
828
829    #[test]
830    fn test_ddex_convenience_methods() {
831        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
832        <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
833            <ern:ReleaseList>
834                <ern:Release>
835                    <ern:ReferenceTitle>
836                        <ern:TitleText>Album Title</ern:TitleText>
837                    </ern:ReferenceTitle>
838                </ern:Release>
839            </ern:ReleaseList>
840            <ern:ResourceList>
841                <ern:SoundRecording>
842                    <ern:SoundRecordingId Namespace="ISRC">USRC17607839</ern:SoundRecordingId>
843                </ern:SoundRecording>
844            </ern:ResourceList>
845        </ern:NewReleaseMessage>"#;
846
847        // Test release titles
848        let cursor1 = Cursor::new(xml.as_bytes());
849        let titles = XPathSelector::select_release_titles(cursor1).expect("Should find titles");
850        assert_eq!(titles.len(), 1);
851        assert_eq!(titles[0], "Album Title");
852
853        // Test ISRCs
854        let cursor2 = Cursor::new(xml.as_bytes());
855        let isrcs = XPathSelector::select_isrcs(cursor2).expect("Should find ISRCs");
856        assert_eq!(isrcs.len(), 1);
857        assert_eq!(isrcs[0], "USRC17607839");
858    }
859
860    #[test]
861    fn test_empty_elements_with_attributes() {
862        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
863        <root>
864            <element value="test1"/>
865            <element value="test2">content</element>
866            <element/>
867        </root>"#;
868
869        let cursor = Cursor::new(xml.as_bytes());
870        let selector = XPathSelector::new("//element").expect("Valid XPath");
871        let result = selector.select(cursor).expect("Selection should work");
872
873        // Should find 3 elements: 2 with values, 1 with content
874        assert_eq!(result.values.len(), 3);
875
876        // Check that we captured both attribute values and text content
877        assert!(result
878            .values
879            .iter()
880            .any(|v| v == "test1" || v == "test2" || v == "content"));
881    }
882
883    #[test]
884    fn test_performance_stats() {
885        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
886        <root>
887            <item>1</item>
888            <item>2</item>
889            <item>3</item>
890        </root>"#;
891
892        let cursor = Cursor::new(xml.as_bytes());
893        let selector = XPathSelector::new("//item").expect("Valid XPath");
894        let result = selector.select(cursor).expect("Selection should work");
895
896        assert_eq!(result.stats.matches_found, 3);
897        assert!(result.stats.elements_processed >= 4); // root + 3 items
898        assert!(result.stats.bytes_processed > 0);
899        assert!(result.stats.duration.as_nanos() > 0);
900    }
901
902    #[test]
903    fn test_attribute_filtering() {
904        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
905        <root>
906            <item type="audio">Audio Item</item>
907            <item type="video">Video Item</item>
908            <item>No Type</item>
909        </root>"#;
910
911        // Test attribute existence
912        let cursor1 = Cursor::new(xml.as_bytes());
913        let selector1 = XPathSelector::new("//item[@type]").expect("Valid XPath");
914        let result1 = selector1.select(cursor1).expect("Selection should work");
915        assert_eq!(result1.values.len(), 2); // Only items with type attribute
916
917        // Test attribute value matching
918        let cursor2 = Cursor::new(xml.as_bytes());
919        let selector2 = XPathSelector::new("//item[@type='audio']").expect("Valid XPath");
920        let result2 = selector2.select(cursor2).expect("Selection should work");
921        assert_eq!(result2.values.len(), 1);
922        assert_eq!(result2.values[0], "Audio Item");
923    }
924}