ddex_parser/parser/
xpath_selector.rs

1// src/parser/xpath_selector.rs
2//! XPath-like selector functionality for efficient XML element selection
3
4use crate::error::ParseError;
5use quick_xml::{events::Event, Reader};
6use std::collections::HashMap;
7use std::io::BufRead;
8
9/// XPath-like selector for extracting specific XML elements and values
10#[derive(Debug, Clone)]
11pub struct XPathSelector {
12    /// Parsed path components
13    path: Vec<PathComponent>,
14    /// Enable namespace-aware matching
15    namespace_aware: bool,
16    /// Enable case-sensitive matching
17    case_sensitive: bool,
18    /// Maximum results to return (0 = unlimited)
19    max_results: usize,
20    /// Skip validation for better performance
21    fast_mode: bool,
22    /// Pre-compiled element name cache for performance
23    #[allow(dead_code)] // Future optimization feature
24    element_cache: std::collections::HashMap<String, String>,
25}
26
27/// Component of an XPath expression
28#[derive(Debug, Clone, PartialEq)]
29pub enum PathComponent {
30    /// Element name (e.g., "Release")
31    Element(String),
32    /// Wildcard match any element (*)
33    Wildcard,
34    /// Descendant-or-self axis (//)
35    DescendantOrSelf,
36    /// Attribute selector ([@attr="value"])
37    AttributeFilter {
38        element: String,
39        attribute: String,
40        value: Option<String>,
41    },
42    /// Index selector ([1], [2], etc.)
43    IndexFilter { element: String, index: usize },
44}
45
46/// Result of XPath selection
47#[derive(Debug, Clone)]
48pub struct XPathResult {
49    /// Selected values
50    pub values: Vec<String>,
51    /// Element paths where matches were found
52    pub paths: Vec<String>,
53    /// Attributes found at matching elements
54    pub attributes: Vec<HashMap<String, String>>,
55    /// Performance statistics
56    pub stats: XPathStats,
57}
58
59/// Performance statistics for XPath selection
60#[derive(Debug, Clone)]
61pub struct XPathStats {
62    pub elements_processed: usize,
63    pub matches_found: usize,
64    pub bytes_processed: usize,
65    pub duration: std::time::Duration,
66}
67
68impl XPathSelector {
69    /// Create a new XPath selector
70    pub fn new(xpath: &str) -> Result<Self, ParseError> {
71        let path = Self::parse_xpath(xpath)?;
72
73        Ok(Self {
74            path,
75            namespace_aware: true,
76            case_sensitive: false,
77            max_results: 0,
78            fast_mode: false,
79            element_cache: HashMap::new(),
80        })
81    }
82
83    /// Create selector for common DDEX patterns
84    pub fn ddex_release_titles() -> Result<Self, ParseError> {
85        Self::new("//Release/ReferenceTitle/TitleText")
86    }
87
88    /// Create selector for ISRC extraction
89    pub fn ddex_isrcs() -> Result<Self, ParseError> {
90        Self::new("//SoundRecordingId")
91    }
92
93    /// Create selector for release IDs
94    pub fn ddex_release_ids() -> Result<Self, ParseError> {
95        Self::new("//ReleaseId")
96    }
97
98    /// Set namespace awareness
99    pub fn namespace_aware(mut self, enabled: bool) -> Self {
100        self.namespace_aware = enabled;
101        self
102    }
103
104    /// Set case sensitivity
105    pub fn case_sensitive(mut self, enabled: bool) -> Self {
106        self.case_sensitive = enabled;
107        self
108    }
109
110    /// Set maximum number of results
111    pub fn max_results(mut self, max: usize) -> Self {
112        self.max_results = max;
113        self
114    }
115
116    /// Enable fast mode (skip some validations for better performance)
117    pub fn fast_mode(mut self, enabled: bool) -> Self {
118        self.fast_mode = enabled;
119        self
120    }
121
122    /// Select elements matching the XPath expression
123    pub fn select<R: BufRead>(&self, reader: R) -> Result<XPathResult, ParseError> {
124        let start_time = std::time::Instant::now();
125        let mut xml_reader = Reader::from_reader(reader);
126        xml_reader.config_mut().trim_text(true);
127
128        // Performance optimizations
129        if self.fast_mode {
130            xml_reader.config_mut().check_end_names = false;
131            xml_reader.config_mut().check_comments = false;
132        }
133
134        let mut results = Vec::new();
135        let mut paths = Vec::new();
136        let mut attributes = Vec::new();
137        let mut buf = Vec::new();
138        let mut current_path = Vec::new();
139        let mut capture_context = Vec::new();
140        let mut elements_processed = 0;
141
142        loop {
143            match xml_reader.read_event_into(&mut buf) {
144                Ok(Event::Start(ref e)) => {
145                    elements_processed += 1;
146                    let element_name = self.extract_element_name(e.name().as_ref())?;
147                    current_path.push(element_name.clone());
148
149                    // Extract attributes for potential filtering
150                    let mut attr_map = HashMap::new();
151                    for attr in e.attributes().flatten() {
152                        let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
153                        let value = String::from_utf8_lossy(&attr.value).to_string();
154                        attr_map.insert(key, value);
155                    }
156
157                    // Check if this element matches our selector with attribute filtering
158                    if self.matches_path_with_attributes(&current_path, &attr_map) {
159                        capture_context.push(CaptureContext {
160                            path: current_path.join("/"),
161                            attributes: attr_map,
162                            capture_text: true,
163                        });
164                    }
165                }
166                Ok(Event::End(_)) => {
167                    current_path.pop();
168
169                    // End any active capture
170                    if !capture_context.is_empty() {
171                        capture_context.pop();
172                    }
173                }
174                Ok(Event::Empty(ref e)) => {
175                    elements_processed += 1;
176                    let element_name = self.extract_element_name(e.name().as_ref())?;
177                    current_path.push(element_name);
178
179                    // Check for match on empty element
180                    if self.matches_path(&current_path) {
181                        let mut attr_map = HashMap::new();
182                        for attr in e.attributes().flatten() {
183                            let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
184                            let value = String::from_utf8_lossy(&attr.value).to_string();
185                            attr_map.insert(key, value);
186                        }
187
188                        // For empty elements, capture attribute values or empty string
189                        let value = self.get_main_attribute(&attr_map).unwrap_or_default();
190
191                        results.push(value);
192                        paths.push(current_path.join("/"));
193                        attributes.push(attr_map);
194
195                        if self.max_results > 0 && results.len() >= self.max_results {
196                            break;
197                        }
198                    }
199
200                    current_path.pop();
201                }
202                Ok(Event::Text(ref e)) => {
203                    if !capture_context.is_empty() {
204                        // Use utf8_utils for proper UTF-8 handling
205                        let current_pos = xml_reader.buffer_position() as usize;
206                        let text = crate::utf8_utils::handle_text_node(e, current_pos)?
207                            .trim()
208                            .to_string();
209
210                        if !text.is_empty() {
211                            let context = capture_context.last().unwrap();
212                            results.push(text);
213                            paths.push(context.path.clone());
214                            attributes.push(context.attributes.clone());
215
216                            if self.max_results > 0 && results.len() >= self.max_results {
217                                break;
218                            }
219                        }
220                    }
221                }
222                Ok(Event::CData(ref e)) => {
223                    if !capture_context.is_empty() {
224                        let text = String::from_utf8_lossy(e).trim().to_string();
225                        if !text.is_empty() {
226                            let context = capture_context.last().unwrap();
227                            results.push(text);
228                            paths.push(context.path.clone());
229                            attributes.push(context.attributes.clone());
230
231                            if self.max_results > 0 && results.len() >= self.max_results {
232                                break;
233                            }
234                        }
235                    }
236                }
237                Ok(Event::Eof) => break,
238                Err(e) => {
239                    return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
240                }
241                _ => {} // Skip other events
242            }
243            buf.clear();
244        }
245
246        Ok(XPathResult {
247            values: results.clone(),
248            paths,
249            attributes,
250            stats: XPathStats {
251                elements_processed,
252                matches_found: results.len(),
253                bytes_processed: xml_reader.buffer_position() as usize,
254                duration: start_time.elapsed(),
255            },
256        })
257    }
258
259    /// Parse XPath expression into path components
260    fn parse_xpath(xpath: &str) -> Result<Vec<PathComponent>, ParseError> {
261        let mut components = Vec::new();
262        let mut parts = Vec::new();
263
264        // Split by '/' but handle '//' specially
265        let mut chars = xpath.chars().peekable();
266        let mut current = String::new();
267
268        while let Some(ch) = chars.next() {
269            match ch {
270                '/' => {
271                    if chars.peek() == Some(&'/') {
272                        chars.next(); // consume second '/'
273                        if !current.is_empty() {
274                            parts.push(current);
275                            current = String::new();
276                        }
277                        parts.push("//".to_string());
278                    } else if !current.is_empty() {
279                        parts.push(current);
280                        current = String::new();
281                    }
282                }
283                _ => current.push(ch),
284            }
285        }
286
287        if !current.is_empty() {
288            parts.push(current);
289        }
290
291        // Parse each part into components
292        for part in parts {
293            if part.is_empty() {
294                continue;
295            } else if part == "//" {
296                components.push(PathComponent::DescendantOrSelf);
297            } else if part == "*" {
298                components.push(PathComponent::Wildcard);
299            } else if part.contains('[') && part.contains(']') {
300                // Parse attribute or index filters
301                let (element, filter) = Self::parse_filter(&part)?;
302
303                if filter.starts_with('@') {
304                    // Attribute filter [@attr] or [@attr="value"]
305                    let attr_expr = &filter[1..]; // Remove @
306                    if let Some(eq_pos) = attr_expr.find('=') {
307                        let attr_name = attr_expr[..eq_pos].to_string();
308                        let attr_value = attr_expr[eq_pos + 1..]
309                            .trim_matches('"')
310                            .trim_matches('\'')
311                            .to_string();
312                        components.push(PathComponent::AttributeFilter {
313                            element,
314                            attribute: attr_name,
315                            value: Some(attr_value),
316                        });
317                    } else {
318                        components.push(PathComponent::AttributeFilter {
319                            element,
320                            attribute: attr_expr.to_string(),
321                            value: None,
322                        });
323                    }
324                } else if let Ok(index) = filter.parse::<usize>() {
325                    // Index filter [1], [2], etc.
326                    components.push(PathComponent::IndexFilter { element, index });
327                } else {
328                    return Err(ParseError::XmlError(format!("Invalid filter expression: [{}]", filter)));
329                }
330            } else {
331                components.push(PathComponent::Element(part));
332            }
333        }
334
335        Ok(components)
336    }
337
338    /// Parse filter expression like "element[filter]"
339    fn parse_filter(input: &str) -> Result<(String, String), ParseError> {
340        if let Some(bracket_start) = input.find('[') {
341            if let Some(bracket_end) = input.rfind(']') {
342                let element = input[..bracket_start].to_string();
343                let filter = input[bracket_start + 1..bracket_end].to_string();
344                return Ok((element, filter));
345            }
346        }
347
348        Err(ParseError::XmlError(format!("Invalid filter syntax: {}", input)))
349    }
350
351    /// Check if current path matches the selector
352    fn matches_path(&self, current: &[String]) -> bool {
353        self.match_components(&self.path, current, 0, 0, &HashMap::new())
354    }
355
356    /// Check if current path matches with attribute filtering
357    fn matches_path_with_attributes(
358        &self,
359        current: &[String],
360        attributes: &HashMap<String, String>,
361    ) -> bool {
362        self.match_components(&self.path, current, 0, 0, attributes)
363    }
364
365    /// Recursively match path components against current path
366    fn match_components(
367        &self,
368        components: &[PathComponent],
369        current: &[String],
370        comp_idx: usize,
371        path_idx: usize,
372        attributes: &HashMap<String, String>,
373    ) -> bool {
374        // If we've matched all components, success
375        if comp_idx >= components.len() {
376            return true;
377        }
378
379        // If we've run out of path but still have components, no match
380        if path_idx >= current.len() {
381            return false;
382        }
383
384        match &components[comp_idx] {
385            PathComponent::Element(name) => {
386                if self.element_matches(name, &current[path_idx]) {
387                    // Exact match, advance both
388                    self.match_components(
389                        components,
390                        current,
391                        comp_idx + 1,
392                        path_idx + 1,
393                        attributes,
394                    )
395                } else {
396                    false
397                }
398            }
399            PathComponent::Wildcard => {
400                // Wildcard matches any element, advance both
401                self.match_components(components, current, comp_idx + 1, path_idx + 1, attributes)
402            }
403            PathComponent::DescendantOrSelf => {
404                // Try matching the next component at any remaining position
405                for i in path_idx..current.len() {
406                    if self.match_components(components, current, comp_idx + 1, i, attributes) {
407                        return true;
408                    }
409                }
410                false
411            }
412            PathComponent::AttributeFilter {
413                element,
414                attribute,
415                value,
416            } => {
417                if self.element_matches(element, &current[path_idx]) {
418                    // Check attribute filtering
419                    if let Some(attr_value) = attributes.get(attribute) {
420                        if let Some(expected_value) = value {
421                            // Attribute must have specific value
422                            if expected_value == attr_value {
423                                self.match_components(
424                                    components,
425                                    current,
426                                    comp_idx + 1,
427                                    path_idx + 1,
428                                    attributes,
429                                )
430                            } else {
431                                false
432                            }
433                        } else {
434                            // Attribute just needs to exist
435                            self.match_components(
436                                components,
437                                current,
438                                comp_idx + 1,
439                                path_idx + 1,
440                                attributes,
441                            )
442                        }
443                    } else {
444                        false // Attribute doesn't exist
445                    }
446                } else {
447                    false
448                }
449            }
450            PathComponent::IndexFilter { element, index } => {
451                if self.element_matches(element, &current[path_idx]) {
452                    // For index filtering, we'd need to count elements at this level
453                    // For now, just match the first occurrence (index 1)
454                    if *index == 1 {
455                        self.match_components(
456                            components,
457                            current,
458                            comp_idx + 1,
459                            path_idx + 1,
460                            attributes,
461                        )
462                    } else {
463                        // More sophisticated index tracking would be needed
464                        self.match_components(
465                            components,
466                            current,
467                            comp_idx + 1,
468                            path_idx + 1,
469                            attributes,
470                        )
471                    }
472                } else {
473                    false
474                }
475            }
476        }
477    }
478
479    /// Check if element name matches, considering namespace and case sensitivity
480    fn element_matches(&self, pattern: &str, actual: &str) -> bool {
481        let actual_local = if self.namespace_aware {
482            // Extract local name after ':'
483            actual.split(':').next_back().unwrap_or(actual)
484        } else {
485            actual
486        };
487
488        if self.case_sensitive {
489            pattern == actual_local
490        } else {
491            pattern.eq_ignore_ascii_case(actual_local)
492        }
493    }
494
495    /// Extract element name from QName bytes
496    fn extract_element_name(&self, qname: &[u8]) -> Result<String, ParseError> {
497        let name_str = std::str::from_utf8(qname).map_err(|_| ParseError::IoError(
498            "Invalid UTF-8 in element name".to_string(),
499        ))?;
500
501        Ok(name_str.to_string())
502    }
503
504    /// Get main attribute value (common patterns like Namespace, value, etc.)
505    fn get_main_attribute(&self, attributes: &HashMap<String, String>) -> Option<String> {
506        // Try common attribute names
507        for attr_name in &["value", "Namespace", "id", "ref"] {
508            if let Some(value) = attributes.get(*attr_name) {
509                return Some(value.clone());
510            }
511        }
512
513        // Return first attribute value if any
514        attributes.values().next().cloned()
515    }
516}
517
518/// Context for capturing element content
519#[derive(Debug, Clone)]
520struct CaptureContext {
521    path: String,
522    attributes: HashMap<String, String>,
523    #[allow(dead_code)] // Future text capture feature
524    capture_text: bool,
525}
526
527/// Convenience functions for common DDEX XPath patterns
528impl XPathSelector {
529    /// Select all release titles
530    pub fn select_release_titles<R: BufRead>(reader: R) -> Result<Vec<String>, ParseError> {
531        let selector = Self::ddex_release_titles()?;
532        let result = selector.select(reader)?;
533        Ok(result.values)
534    }
535
536    /// Select all ISRCs
537    pub fn select_isrcs<R: BufRead>(reader: R) -> Result<Vec<String>, ParseError> {
538        let selector = Self::ddex_isrcs()?;
539        let result = selector.select(reader)?;
540        Ok(result.values)
541    }
542
543    /// Select elements with custom XPath
544    pub fn select_with_xpath<R: BufRead>(
545        reader: R,
546        xpath: &str,
547    ) -> Result<Vec<String>, ParseError> {
548        let selector = Self::new(xpath)?;
549        let result = selector.select(reader)?;
550        Ok(result.values)
551    }
552
553    /// High-performance batch selection for multiple XPath expressions
554    pub fn select_multiple<R: BufRead>(
555        reader: R,
556        xpaths: &[&str],
557    ) -> Result<Vec<Vec<String>>, ParseError> {
558        let mut selectors = Vec::new();
559        for xpath in xpaths {
560            selectors.push(Self::new(xpath)?.fast_mode(true));
561        }
562
563        let mut xml_reader = Reader::from_reader(reader);
564        xml_reader.config_mut().trim_text(true);
565        xml_reader.config_mut().check_end_names = false;
566        xml_reader.config_mut().check_comments = false;
567
568        let mut all_results: Vec<Vec<String>> = vec![Vec::new(); selectors.len()];
569        let mut buf = Vec::new();
570        let mut current_path = Vec::new();
571        let mut capture_contexts: Vec<Vec<CaptureContext>> = vec![Vec::new(); selectors.len()];
572
573        loop {
574            match xml_reader.read_event_into(&mut buf) {
575                Ok(Event::Start(ref e)) => {
576                    let element_name = String::from_utf8_lossy(e.name().as_ref()).to_string();
577                    current_path.push(element_name.clone());
578
579                    // Extract attributes once for all selectors
580                    let mut attr_map = HashMap::new();
581                    for attr in e.attributes().flatten() {
582                        let key = String::from_utf8_lossy(attr.key.as_ref()).to_string();
583                        let value = String::from_utf8_lossy(&attr.value).to_string();
584                        attr_map.insert(key, value);
585                    }
586
587                    // Check against all selectors
588                    for (i, selector) in selectors.iter().enumerate() {
589                        if selector.matches_path_with_attributes(&current_path, &attr_map) {
590                            capture_contexts[i].push(CaptureContext {
591                                path: current_path.join("/"),
592                                attributes: attr_map.clone(),
593                                capture_text: true,
594                            });
595                        }
596                    }
597                }
598                Ok(Event::End(_)) => {
599                    current_path.pop();
600                    for contexts in &mut capture_contexts {
601                        if !contexts.is_empty() {
602                            contexts.pop();
603                        }
604                    }
605                }
606                Ok(Event::Text(ref e)) => {
607                    // Use utf8_utils for proper UTF-8 handling
608                    let current_pos = xml_reader.buffer_position() as usize;
609                    let text = crate::utf8_utils::handle_text_node(e, current_pos)?
610                        .trim()
611                        .to_string();
612
613                    if !text.is_empty() {
614                        for (i, contexts) in capture_contexts.iter().enumerate() {
615                            if !contexts.is_empty() {
616                                all_results[i].push(text.clone());
617                            }
618                        }
619                    }
620                }
621                Ok(Event::Eof) => break,
622                Err(e) => {
623                    return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
624                }
625                _ => {}
626            }
627            buf.clear();
628        }
629
630        Ok(all_results)
631    }
632}
633
634#[cfg(test)]
635mod tests {
636    use super::*;
637    use std::io::Cursor;
638
639    #[test]
640    fn test_xpath_parsing() {
641        let selector = XPathSelector::new("//Release/Title").expect("Valid XPath");
642        assert_eq!(selector.path.len(), 3);
643
644        match &selector.path[0] {
645            PathComponent::DescendantOrSelf => {}
646            _ => panic!("Expected DescendantOrSelf"),
647        }
648
649        match &selector.path[1] {
650            PathComponent::Element(name) => assert_eq!(name, "Release"),
651            _ => panic!("Expected Element(Release)"),
652        }
653    }
654
655    #[test]
656    fn test_simple_element_selection() {
657        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
658        <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
659            <ern:MessageHeader>
660                <ern:MessageId>MSG001</ern:MessageId>
661            </ern:MessageHeader>
662            <ern:ReleaseList>
663                <ern:Release>
664                    <ern:ReleaseId>REL001</ern:ReleaseId>
665                    <ern:ReferenceTitle>
666                        <ern:TitleText>My Album Title</ern:TitleText>
667                    </ern:ReferenceTitle>
668                </ern:Release>
669            </ern:ReleaseList>
670        </ern:NewReleaseMessage>"#;
671
672        let cursor = Cursor::new(xml.as_bytes());
673        let selector = XPathSelector::new("//TitleText").expect("Valid XPath");
674        let result = selector.select(cursor).expect("Selection should work");
675
676        assert_eq!(result.values.len(), 1);
677        assert_eq!(result.values[0], "My Album Title");
678        assert!(result.stats.elements_processed > 0);
679    }
680
681    #[test]
682    fn test_wildcard_selection() {
683        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
684        <root>
685            <section>
686                <item>Value 1</item>
687                <item>Value 2</item>
688            </section>
689            <section>
690                <item>Value 3</item>
691            </section>
692        </root>"#;
693
694        let cursor = Cursor::new(xml.as_bytes());
695        let selector = XPathSelector::new("//*/item").expect("Valid XPath");
696        let result = selector.select(cursor).expect("Selection should work");
697
698        assert_eq!(result.values.len(), 3);
699        assert!(result.values.contains(&"Value 1".to_string()));
700        assert!(result.values.contains(&"Value 2".to_string()));
701        assert!(result.values.contains(&"Value 3".to_string()));
702    }
703
704    #[test]
705    fn test_descendant_or_self() {
706        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
707        <root>
708            <level1>
709                <level2>
710                    <target>Deep Value</target>
711                </level2>
712            </level1>
713            <target>Shallow Value</target>
714        </root>"#;
715
716        let cursor = Cursor::new(xml.as_bytes());
717        let selector = XPathSelector::new("//target").expect("Valid XPath");
718        let result = selector.select(cursor).expect("Selection should work");
719
720        assert_eq!(result.values.len(), 2);
721        assert!(result.values.contains(&"Deep Value".to_string()));
722        assert!(result.values.contains(&"Shallow Value".to_string()));
723    }
724
725    #[test]
726    fn test_max_results_limit() {
727        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
728        <root>
729            <item>1</item>
730            <item>2</item>
731            <item>3</item>
732            <item>4</item>
733            <item>5</item>
734        </root>"#;
735
736        let cursor = Cursor::new(xml.as_bytes());
737        let selector = XPathSelector::new("//item")
738            .expect("Valid XPath")
739            .max_results(3);
740        let result = selector.select(cursor).expect("Selection should work");
741
742        assert_eq!(result.values.len(), 3);
743        assert_eq!(result.stats.matches_found, 3);
744    }
745
746    #[test]
747    fn test_namespace_awareness() {
748        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
749        <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
750            <ern:Release>
751                <ern:ReleaseId>REL001</ern:ReleaseId>
752            </ern:Release>
753        </ern:NewReleaseMessage>"#;
754
755        // With namespace awareness (default)
756        let cursor1 = Cursor::new(xml.as_bytes());
757        let selector1 = XPathSelector::new("//ReleaseId")
758            .expect("Valid XPath")
759            .namespace_aware(true);
760        let result1 = selector1.select(cursor1).expect("Selection should work");
761        assert_eq!(result1.values.len(), 1);
762
763        // Without namespace awareness
764        let cursor2 = Cursor::new(xml.as_bytes());
765        let selector2 = XPathSelector::new("//ReleaseId")
766            .expect("Valid XPath")
767            .namespace_aware(false);
768        let result2 = selector2.select(cursor2).expect("Selection should work");
769        assert_eq!(result2.values.len(), 0); // Won't match ern:ReleaseId
770    }
771
772    #[test]
773    fn test_case_sensitivity() {
774        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
775        <root>
776            <ReleaseId>REL001</ReleaseId>
777            <releaseid>REL002</releaseid>
778        </root>"#;
779
780        // Case insensitive (default)
781        let cursor1 = Cursor::new(xml.as_bytes());
782        let selector1 = XPathSelector::new("//releaseid")
783            .expect("Valid XPath")
784            .case_sensitive(false);
785        let result1 = selector1.select(cursor1).expect("Selection should work");
786        assert_eq!(result1.values.len(), 2); // Matches both
787
788        // Case sensitive
789        let cursor2 = Cursor::new(xml.as_bytes());
790        let selector2 = XPathSelector::new("//releaseid")
791            .expect("Valid XPath")
792            .case_sensitive(true);
793        let result2 = selector2.select(cursor2).expect("Selection should work");
794        assert_eq!(result2.values.len(), 1); // Only exact match
795    }
796
797    #[test]
798    fn test_ddex_convenience_methods() {
799        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
800        <ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
801            <ern:ReleaseList>
802                <ern:Release>
803                    <ern:ReferenceTitle>
804                        <ern:TitleText>Album Title</ern:TitleText>
805                    </ern:ReferenceTitle>
806                </ern:Release>
807            </ern:ReleaseList>
808            <ern:ResourceList>
809                <ern:SoundRecording>
810                    <ern:SoundRecordingId Namespace="ISRC">USRC17607839</ern:SoundRecordingId>
811                </ern:SoundRecording>
812            </ern:ResourceList>
813        </ern:NewReleaseMessage>"#;
814
815        // Test release titles
816        let cursor1 = Cursor::new(xml.as_bytes());
817        let titles = XPathSelector::select_release_titles(cursor1).expect("Should find titles");
818        assert_eq!(titles.len(), 1);
819        assert_eq!(titles[0], "Album Title");
820
821        // Test ISRCs
822        let cursor2 = Cursor::new(xml.as_bytes());
823        let isrcs = XPathSelector::select_isrcs(cursor2).expect("Should find ISRCs");
824        assert_eq!(isrcs.len(), 1);
825        assert_eq!(isrcs[0], "USRC17607839");
826    }
827
828    #[test]
829    fn test_empty_elements_with_attributes() {
830        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
831        <root>
832            <element value="test1"/>
833            <element value="test2">content</element>
834            <element/>
835        </root>"#;
836
837        let cursor = Cursor::new(xml.as_bytes());
838        let selector = XPathSelector::new("//element").expect("Valid XPath");
839        let result = selector.select(cursor).expect("Selection should work");
840
841        // Should find 3 elements: 2 with values, 1 with content
842        assert_eq!(result.values.len(), 3);
843
844        // Check that we captured both attribute values and text content
845        assert!(result
846            .values
847            .iter()
848            .any(|v| v == "test1" || v == "test2" || v == "content"));
849    }
850
851    #[test]
852    fn test_performance_stats() {
853        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
854        <root>
855            <item>1</item>
856            <item>2</item>
857            <item>3</item>
858        </root>"#;
859
860        let cursor = Cursor::new(xml.as_bytes());
861        let selector = XPathSelector::new("//item").expect("Valid XPath");
862        let result = selector.select(cursor).expect("Selection should work");
863
864        assert_eq!(result.stats.matches_found, 3);
865        assert!(result.stats.elements_processed >= 4); // root + 3 items
866        assert!(result.stats.bytes_processed > 0);
867        assert!(result.stats.duration.as_nanos() > 0);
868    }
869
870    #[test]
871    fn test_attribute_filtering() {
872        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
873        <root>
874            <item type="audio">Audio Item</item>
875            <item type="video">Video Item</item>
876            <item>No Type</item>
877        </root>"#;
878
879        // Test attribute existence
880        let cursor1 = Cursor::new(xml.as_bytes());
881        let selector1 = XPathSelector::new("//item[@type]").expect("Valid XPath");
882        let result1 = selector1.select(cursor1).expect("Selection should work");
883        assert_eq!(result1.values.len(), 2); // Only items with type attribute
884
885        // Test attribute value matching
886        let cursor2 = Cursor::new(xml.as_bytes());
887        let selector2 = XPathSelector::new("//item[@type='audio']").expect("Valid XPath");
888        let result2 = selector2.select(cursor2).expect("Selection should work");
889        assert_eq!(result2.values.len(), 1);
890        assert_eq!(result2.values[0], "Audio Item");
891    }
892}