Skip to main content

spring_batch_rs/item/xml/
xml_reader.rs

1use crate::core::item::{ItemReader, ItemReaderResult};
2use crate::error::BatchError;
3use log::{debug, error};
4use quick_xml::de::from_str;
5use quick_xml::events::Event;
6use quick_xml::reader::Reader as XmlReader;
7use serde::de::DeserializeOwned;
8use std::any::type_name;
9use std::cell::RefCell;
10use std::fs::File;
11use std::io::{BufReader, Read};
12use std::marker::PhantomData;
13use std::path::Path;
14use std::str;
15
16/// A builder for creating XML item readers.
17///
18/// This builder helps configure XML readers with:
19/// - A tag name to identify items in the XML
20/// - Buffer capacity for performance tuning
21/// - Various input sources (files, in-memory data, etc.)
22///
23/// # Examples
24///
25/// ```
26/// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
27/// use spring_batch_rs::core::item::ItemReader;
28/// use serde::Deserialize;
29/// use std::io::Cursor;
30///
31/// // Define a structure that matches our XML format
32/// #[derive(Debug, Deserialize)]
33/// struct Person {
34///     #[serde(rename = "@id")]
35///     id: i32,
36///     name: String,
37///     age: i32,
38/// }
39///
40/// // Create some XML data
41/// let xml_data = r#"
42/// <people>
43///   <person id="1">
44///     <name>Alice</name>
45///     <age>30</age>
46///   </person>
47///   <person id="2">
48///     <name>Bob</name>
49///     <age>25</age>
50///   </person>
51/// </people>
52/// "#;
53///
54/// // Create a reader from an in-memory buffer
55/// let cursor = Cursor::new(xml_data);
56/// let reader = XmlItemReaderBuilder::<Person>::new()
57///     .tag("person")
58///     .from_reader(cursor);
59///
60/// // Read all persons from the XML
61/// let mut persons = Vec::new();
62/// let mut person_count = 0;
63/// while let Some(person) = reader.read().unwrap() {
64///     persons.push(person);
65///     person_count += 1;
66/// }
67///
68/// assert_eq!(person_count, 2);
69/// assert_eq!(persons[0].id, 1);
70/// assert_eq!(persons[0].name, "Alice");
71/// assert_eq!(persons[1].name, "Bob");
72/// ```
73pub struct XmlItemReaderBuilder<I: DeserializeOwned> {
74    tag_name: Option<String>,
75    capacity: usize,
76    _marker: PhantomData<I>,
77}
78
79impl<I: DeserializeOwned> Default for XmlItemReaderBuilder<I> {
80    fn default() -> Self {
81        Self {
82            tag_name: None,
83            capacity: 1024,
84            _marker: PhantomData,
85        }
86    }
87}
88
89impl<I: DeserializeOwned> XmlItemReaderBuilder<I> {
90    /// Creates a new XML item reader builder.
91    ///
92    /// By default, it will:
93    /// - Look for XML elements matching the type name
94    /// - Use a buffer capacity of 1024 bytes
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
100    /// use serde::Deserialize;
101    ///
102    /// #[derive(Deserialize)]
103    /// struct Person {
104    ///     name: String,
105    ///     age: i32,
106    /// }
107    ///
108    /// let builder = XmlItemReaderBuilder::<Person>::new();
109    /// ```
110    pub fn new() -> Self {
111        Self::default()
112    }
113
114    /// Sets the buffer capacity for the XML reader.
115    ///
116    /// Higher capacity can improve performance for larger XML documents
117    /// but will use more memory.
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
123    /// use serde::Deserialize;
124    ///
125    /// #[derive(Deserialize)]
126    /// struct Person {
127    ///     name: String,
128    ///     age: i32,
129    /// }
130    ///
131    /// // Increase buffer capacity for better performance with large files
132    /// let builder = XmlItemReaderBuilder::<Person>::new()
133    ///     .capacity(4096);
134    /// ```
135    pub fn capacity(mut self, capacity: usize) -> Self {
136        self.capacity = capacity;
137        self
138    }
139
140    /// Sets the XML tag name to search for items.
141    ///
142    /// This method specifies which XML element represents a single item.
143    /// The reader will look for elements with this tag name and deserialize
144    /// them into the target type.
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
150    /// use serde::Deserialize;
151    ///
152    /// #[derive(Deserialize)]
153    /// struct Person {
154    ///     name: String,
155    ///     age: i32,
156    /// }
157    ///
158    /// // Look for <person> elements in the XML
159    /// let builder = XmlItemReaderBuilder::<Person>::new()
160    ///     .tag("person");
161    /// ```
162    pub fn tag<S: AsRef<str>>(mut self, tag_name: S) -> Self {
163        self.tag_name = Some(tag_name.as_ref().to_string());
164        self
165    }
166
167    /// Creates an XML item reader from a reader.
168    ///
169    /// This allows reading from any source that implements the `Read` trait,
170    /// such as files, network streams, or in-memory buffers.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
176    /// use spring_batch_rs::core::item::ItemReader;
177    /// use serde::Deserialize;
178    /// use std::io::Cursor;
179    ///
180    /// #[derive(Debug, Deserialize)]
181    /// struct Person {
182    ///     name: String,
183    ///     age: i32,
184    /// }
185    ///
186    /// // Create XML data with two persons
187    /// let xml_data = r#"
188    /// <people>
189    ///   <person>
190    ///     <name>Alice</name>
191    ///     <age>30</age>
192    ///   </person>
193    ///   <person>
194    ///     <name>Bob</name>
195    ///     <age>25</age>
196    ///   </person>
197    /// </people>
198    /// "#;
199    ///
200    /// // Create a reader from an in-memory buffer
201    /// let cursor = Cursor::new(xml_data);
202    /// let reader = XmlItemReaderBuilder::<Person>::new()
203    ///     .tag("person")
204    ///     .from_reader(cursor);
205    ///
206    /// // Read and process each person
207    /// let first_person = reader.read().unwrap().unwrap();
208    /// assert_eq!(first_person.name, "Alice");
209    /// assert_eq!(first_person.age, 30);
210    ///
211    /// let second_person = reader.read().unwrap().unwrap();
212    /// assert_eq!(second_person.name, "Bob");
213    /// assert_eq!(second_person.age, 25);
214    ///
215    /// // No more persons
216    /// assert!(reader.read().unwrap().is_none());
217    /// ```
218    pub fn from_reader<R: Read + 'static>(self, reader: R) -> XmlItemReader<R, I> {
219        let tag = match self.tag_name {
220            Some(tag) => tag.into_bytes(),
221            None => {
222                // Default tag name is derived from the type name
223                let type_str = type_name::<I>();
224                let tag_name = type_str.split("::").last().unwrap_or(type_str);
225                tag_name.as_bytes().to_vec()
226            }
227        };
228
229        XmlItemReader::with_tag(reader, self.capacity, tag)
230    }
231
232    /// Creates an XML item reader from a file path.
233    ///
234    /// # Examples
235    ///
236    /// ```no_run
237    /// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
238    /// use spring_batch_rs::core::item::ItemReader;
239    /// use serde::Deserialize;
240    /// use std::path::Path;
241    ///
242    /// #[derive(Debug, Deserialize)]
243    /// struct Person {
244    ///     #[serde(rename = "@id")]
245    ///     id: i32,
246    ///     name: String,
247    ///     age: i32,
248    /// }
249    ///
250    /// // Read from an XML file
251    /// let reader = XmlItemReaderBuilder::<Person>::new()
252    ///     .tag("person")
253    ///     .from_path("data/persons.xml")
254    ///     .unwrap();
255    ///
256    /// // Process each person from the file
257    /// while let Some(person) = reader.read().unwrap() {
258    ///     println!("Read person: {} (id: {})", person.name, person.id);
259    /// }
260    /// ```
261    pub fn from_path<P: AsRef<Path>>(self, path: P) -> Result<XmlItemReader<File, I>, BatchError> {
262        let file_path = path.as_ref();
263        let file = File::open(file_path).map_err(|e| {
264            error!("Failed to open XML file {}: {}", file_path.display(), e);
265            BatchError::ItemReader(format!(
266                "Failed to open XML file {}: {}",
267                file_path.display(),
268                e
269            ))
270        })?;
271
272        Ok(self.from_reader(file))
273    }
274}
275
276/// A simple reader that reads items from an XML file.
277///
278/// This reader parses XML content and deserializes elements with a specific tag
279/// into the desired type. It handles XML attributes, nested elements, and text content.
280///
281/// # Examples
282///
283/// Reading complex nested XML structures:
284///
285/// ```
286/// use spring_batch_rs::item::xml::XmlItemReaderBuilder;
287/// use spring_batch_rs::core::item::ItemReader;
288/// use serde::Deserialize;
289/// use std::io::Cursor;
290///
291/// // Define a nested structure matching our XML format
292/// #[derive(Debug, Deserialize)]
293/// struct Address {
294///     street: String,
295///     city: String,
296///     country: String,
297/// }
298///
299/// #[derive(Debug, Deserialize)]
300/// struct Person {
301///     #[serde(rename = "@id")]
302///     id: i32,
303///     name: String,
304///     age: i32,
305///     address: Address,
306/// }
307///
308/// // Create XML with nested elements
309/// let xml_data = r#"
310/// <directory>
311///   <person id="1">
312///     <name>Alice</name>
313///     <age>30</age>
314///     <address>
315///       <street>123 Main St</street>
316///       <city>Springfield</city>
317///       <country>USA</country>
318///     </address>
319///   </person>
320/// </directory>
321/// "#;
322///
323/// // Create a reader from the XML
324/// let cursor = Cursor::new(xml_data);
325/// let reader = XmlItemReaderBuilder::<Person>::new()
326///     .tag("person")
327///     .from_reader(cursor);
328///
329/// // Read and verify the person with nested address
330/// let person = reader.read().unwrap().unwrap();
331/// assert_eq!(person.id, 1);
332/// assert_eq!(person.name, "Alice");
333/// assert_eq!(person.address.street, "123 Main St");
334/// assert_eq!(person.address.city, "Springfield");
335/// assert_eq!(person.address.country, "USA");
336/// ```
337pub struct XmlItemReader<R, I> {
338    reader: RefCell<XmlReader<BufReader<R>>>,
339    buffer: RefCell<Vec<u8>>,
340    item_tag_name: Vec<u8>,
341    _marker: PhantomData<I>,
342}
343
344impl<R: Read, I: DeserializeOwned> XmlItemReader<R, I> {
345    /// Creates a new XML item reader with a specific tag name.
346    fn with_tag<S: AsRef<[u8]>>(rdr: R, capacity: usize, tag: S) -> Self {
347        let buf_reader = BufReader::with_capacity(capacity, rdr);
348        let mut xml_reader = XmlReader::from_reader(buf_reader);
349        // Don't trim text to preserve spaces around entity references
350        xml_reader.config_mut().trim_text(false);
351
352        Self {
353            reader: RefCell::new(xml_reader),
354            buffer: RefCell::new(Vec::with_capacity(1024)),
355            item_tag_name: tag.as_ref().to_vec(),
356            _marker: PhantomData,
357        }
358    }
359}
360
361impl<R: Read, I: DeserializeOwned> ItemReader<I> for XmlItemReader<R, I> {
362    fn read(&self) -> ItemReaderResult<I> {
363        let mut reader = self.reader.borrow_mut();
364        let mut buffer = self.buffer.borrow_mut();
365
366        let tag_name_str = str::from_utf8(&self.item_tag_name).unwrap_or("<binary>");
367        debug!("Looking for tag: '{}'", tag_name_str);
368
369        // Skip events until we find a start element matching our tag
370        loop {
371            buffer.clear();
372            let event = reader
373                .read_event_into(&mut buffer)
374                .map_err(|e| BatchError::ItemReader(format!("XML parsing error: {}", e)))?;
375
376            match event {
377                Event::Start(ref e) => {
378                    let e_name = e.name();
379                    let name_ref = e_name.as_ref();
380                    let tag_name = str::from_utf8(name_ref).unwrap_or("<binary>");
381
382                    if name_ref == self.item_tag_name.as_slice() {
383                        debug!("Found start tag: '{}'", tag_name);
384
385                        // Extract the full XML for this element
386                        let mut xml_string = String::new();
387                        xml_string.push('<');
388                        if let Ok(name) = str::from_utf8(tag_name.as_ref()) {
389                            xml_string.push_str(name);
390                        }
391                        for attr in e.attributes().flatten() {
392                            xml_string.push(' ');
393                            if let Ok(key) = str::from_utf8(attr.key.as_ref()) {
394                                xml_string.push_str(key);
395                            }
396                            xml_string.push_str("=\"");
397                            if let Ok(value) = str::from_utf8(attr.value.as_ref()) {
398                                xml_string.push_str(value);
399                            }
400                            xml_string.push('"');
401                        }
402                        xml_string.push('>');
403
404                        // Continue reading to get the content
405                        let mut depth = 1;
406                        while depth > 0 {
407                            buffer.clear();
408                            match reader.read_event_into(&mut buffer) {
409                                Ok(Event::Start(ref start)) => {
410                                    depth += 1;
411                                    let s_name = start.name();
412                                    if let Ok(name) = str::from_utf8(s_name.as_ref()) {
413                                        xml_string.push('<');
414                                        xml_string.push_str(name);
415
416                                        // Add attributes
417                                        for attr in start.attributes().flatten() {
418                                            xml_string.push(' ');
419                                            if let Ok(key) = str::from_utf8(attr.key.as_ref()) {
420                                                xml_string.push_str(key);
421                                            }
422                                            xml_string.push_str("=\"");
423                                            if let Ok(value) = str::from_utf8(attr.value.as_ref()) {
424                                                xml_string.push_str(value);
425                                            }
426                                            xml_string.push('"');
427                                        }
428                                        xml_string.push('>');
429                                    }
430                                }
431                                Ok(Event::End(ref end)) => {
432                                    depth -= 1;
433                                    let e_name = end.name();
434                                    if let Ok(name) = str::from_utf8(e_name.as_ref()) {
435                                        xml_string.push_str("</");
436                                        xml_string.push_str(name);
437                                        xml_string.push('>');
438                                    }
439                                }
440                                Ok(Event::Text(ref text)) => {
441                                    // For text nodes, add the raw text content
442                                    let bytes = text.as_ref();
443                                    if let Ok(s) = str::from_utf8(bytes) {
444                                        xml_string.push_str(s);
445                                    }
446                                }
447                                Ok(Event::GeneralRef(ref entity_ref)) => {
448                                    // In quick-xml 0.38+, entity references are reported as separate events
449                                    // Reconstruct the escaped form for proper XML serialization
450                                    let entity_name = entity_ref.as_ref();
451                                    if let Ok(name) = str::from_utf8(entity_name) {
452                                        xml_string.push('&');
453                                        xml_string.push_str(name);
454                                        xml_string.push(';');
455                                    }
456                                }
457                                Ok(Event::CData(ref cdata)) => {
458                                    // For CDATA, wrap in CDATA tags
459                                    let bytes = cdata.as_ref();
460                                    if let Ok(s) = str::from_utf8(bytes) {
461                                        xml_string.push_str("<![CDATA[");
462                                        xml_string.push_str(s);
463                                        xml_string.push_str("]]>");
464                                    }
465                                }
466                                Ok(Event::Eof) => {
467                                    return Err(BatchError::ItemReader(
468                                        "Unexpected end of file".to_string(),
469                                    ));
470                                }
471                                Err(e) => {
472                                    return Err(BatchError::ItemReader(format!(
473                                        "Error reading XML: {}",
474                                        e
475                                    )));
476                                }
477                                _ => { /* Ignore other events */ }
478                            }
479                        }
480
481                        debug!("Finished reading XML item: {}", xml_string);
482
483                        // Now deserialize the complete XML string
484                        match from_str(&xml_string) {
485                            Ok(item) => return Ok(Some(item)),
486                            Err(e) => {
487                                error!(
488                                    "Failed to deserialize XML item: {} from: {}",
489                                    e, xml_string
490                                );
491                                return Err(BatchError::ItemReader(format!(
492                                    "Failed to deserialize XML item: {} from: {}",
493                                    e, xml_string
494                                )));
495                            }
496                        }
497                    }
498                }
499                Event::Eof => {
500                    debug!("Reached end of file");
501                    return Ok(None);
502                }
503                _ => continue, // Skip other events
504            }
505        }
506    }
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512    use serde::{Deserialize, Serialize};
513    use std::io::{Cursor, Write};
514    use tempfile::NamedTempFile;
515
516    // This tells serde to deserialize from the XML tag "TestItem"
517    #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
518    #[serde(rename = "TestItem")]
519    struct TestItem {
520        name: String,
521        value: i32,
522    }
523
524    // Complex nested structures for testing
525    #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
526    struct EngineSpecs {
527        #[serde(rename = "@type")]
528        engine_type: String,
529        #[serde(rename = "@cylinders")]
530        cylinders: i32,
531        horsepower: i32,
532        #[serde(rename = "fuelEfficiency")]
533        fuel_efficiency: f32,
534    }
535
536    #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
537    struct Features {
538        #[serde(rename = "feature", default)]
539        items: Vec<String>,
540    }
541
542    #[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
543    #[serde(rename = "vehicle")]
544    struct Vehicle {
545        #[serde(rename = "@id")]
546        id: String,
547        #[serde(rename = "@category")]
548        category: String,
549        make: String,
550        model: String,
551        year: i32,
552        engine: EngineSpecs,
553        features: Features,
554    }
555
556    #[test]
557    fn test_xml_reader() {
558        let xml_content = r#"
559            <items>
560                <TestItem>
561                    <name>test1</name>
562                    <value>42</value>
563                </TestItem>
564                <TestItem>
565                    <name>test2</name>
566                    <value>43</value>
567                </TestItem>
568            </items>
569        "#;
570
571        let mut temp_file = NamedTempFile::new().unwrap();
572        temp_file.write_all(xml_content.as_bytes()).unwrap();
573
574        // Use builder to create the reader
575        let reader = XmlItemReaderBuilder::<TestItem>::new()
576            .tag("TestItem")
577            .capacity(1024)
578            .from_path(temp_file.path())
579            .unwrap();
580
581        let item1 = reader.read().unwrap().unwrap();
582        assert_eq!(
583            item1,
584            TestItem {
585                name: "test1".to_string(),
586                value: 42,
587            }
588        );
589
590        let item2 = reader.read().unwrap().unwrap();
591        assert_eq!(
592            item2,
593            TestItem {
594                name: "test2".to_string(),
595                value: 43,
596            }
597        );
598
599        assert!(reader.read().unwrap().is_none());
600    }
601
602    #[test]
603    fn test_xml_reader_with_custom_tag() {
604        let xml_content = r#"
605            <root>
606                <car>
607                    <name>test1</name>
608                    <value>42</value>
609                </car>
610                <car>
611                    <name>test2</name>
612                    <value>43</value>
613                </car>
614            </root>
615        "#;
616
617        let mut temp_file = NamedTempFile::new().unwrap();
618        temp_file.write_all(xml_content.as_bytes()).unwrap();
619
620        let reader = XmlItemReaderBuilder::<TestItem>::new()
621            .tag("car")
622            .capacity(1024)
623            .from_path(temp_file.path())
624            .unwrap();
625
626        let item1 = reader.read().unwrap().unwrap();
627        assert_eq!(
628            item1,
629            TestItem {
630                name: "test1".to_string(),
631                value: 42,
632            }
633        );
634
635        let item2 = reader.read().unwrap().unwrap();
636        assert_eq!(
637            item2,
638            TestItem {
639                name: "test2".to_string(),
640                value: 43,
641            }
642        );
643
644        assert!(reader.read().unwrap().is_none());
645    }
646
647    #[test]
648    fn test_complex_nested_objects() {
649        let xml_content = r#"
650            <root>
651                <vehicle id="v001" category="sedan">
652                    <make>Toyota</make>
653                    <model>Camry</model>
654                    <year>2022</year>
655                    <engine type="hybrid" cylinders="4">
656                        <horsepower>208</horsepower>
657                        <fuelEfficiency>4.5</fuelEfficiency>
658                    </engine>
659                    <features>
660                        <feature>Bluetooth</feature>
661                        <feature>Navigation</feature>
662                        <feature>Leather Seats</feature>
663                    </features>
664                </vehicle>
665                <vehicle id="v002" category="suv">
666                    <make>Honda</make>
667                    <model>CR-V</model>
668                    <year>2023</year>
669                    <engine type="gasoline" cylinders="4">
670                        <horsepower>190</horsepower>
671                        <fuelEfficiency>7.2</fuelEfficiency>
672                    </engine>
673                    <features>
674                        <feature>All-wheel drive</feature>
675                        <feature>Sunroof</feature>
676                    </features>
677                </vehicle>
678            </root>
679        "#;
680
681        let mut temp_file = NamedTempFile::new().unwrap();
682        temp_file.write_all(xml_content.as_bytes()).unwrap();
683
684        let reader = XmlItemReaderBuilder::<Vehicle>::new()
685            .tag("vehicle")
686            .capacity(1024)
687            .from_path(temp_file.path())
688            .unwrap();
689
690        // First item
691        let vehicle1 = reader.read().unwrap().unwrap();
692        assert_eq!(vehicle1.id, "v001");
693        assert_eq!(vehicle1.category, "sedan");
694        assert_eq!(vehicle1.make, "Toyota");
695        assert_eq!(vehicle1.model, "Camry");
696        assert_eq!(vehicle1.year, 2022);
697        assert_eq!(vehicle1.engine.engine_type, "hybrid");
698        assert_eq!(vehicle1.engine.cylinders, 4);
699        assert_eq!(vehicle1.engine.horsepower, 208);
700        assert_eq!(vehicle1.engine.fuel_efficiency, 4.5);
701        assert_eq!(vehicle1.features.items.len(), 3);
702        assert_eq!(vehicle1.features.items[0], "Bluetooth");
703        assert_eq!(vehicle1.features.items[1], "Navigation");
704        assert_eq!(vehicle1.features.items[2], "Leather Seats");
705
706        // Second item
707        let vehicle2 = reader.read().unwrap().unwrap();
708        assert_eq!(vehicle2.id, "v002");
709        assert_eq!(vehicle2.category, "suv");
710        assert_eq!(vehicle2.make, "Honda");
711        assert_eq!(vehicle2.model, "CR-V");
712        assert_eq!(vehicle2.year, 2023);
713        assert_eq!(vehicle2.engine.engine_type, "gasoline");
714        assert_eq!(vehicle2.engine.cylinders, 4);
715        assert_eq!(vehicle2.engine.horsepower, 190);
716        assert_eq!(vehicle2.engine.fuel_efficiency, 7.2);
717        assert_eq!(vehicle2.features.items.len(), 2);
718        assert_eq!(vehicle2.features.items[0], "All-wheel drive");
719        assert_eq!(vehicle2.features.items[1], "Sunroof");
720
721        // No more items
722        assert!(reader.read().unwrap().is_none());
723    }
724
725    #[test]
726    fn test_xml_reader_builder() {
727        let xml_content = r#"
728            <data>
729                <vehicle id="v001" category="sedan">
730                    <make>Toyota</make>
731                    <model>Camry</model>
732                    <year>2022</year>
733                    <engine type="hybrid" cylinders="4">
734                        <horsepower>208</horsepower>
735                        <fuelEfficiency>4.5</fuelEfficiency>
736                    </engine>
737                    <features>
738                        <feature>Bluetooth</feature>
739                        <feature>Navigation</feature>
740                    </features>
741                </vehicle>
742            </data>
743        "#;
744
745        let mut temp_file = NamedTempFile::new().unwrap();
746        temp_file.write_all(xml_content.as_bytes()).unwrap();
747
748        // Use builder to create the reader with custom tag and capacity
749        let reader = XmlItemReaderBuilder::<Vehicle>::new()
750            .tag("vehicle")
751            .capacity(2048)
752            .from_path(temp_file.path())
753            .unwrap();
754
755        // Verify the reader works correctly
756        let vehicle = reader.read().unwrap().unwrap();
757        assert_eq!(vehicle.id, "v001");
758        assert_eq!(vehicle.make, "Toyota");
759        assert_eq!(vehicle.model, "Camry");
760        assert_eq!(vehicle.year, 2022);
761
762        // No more items
763        assert!(reader.read().unwrap().is_none());
764    }
765
766    #[test]
767    fn test_empty_xml_file() {
768        // Empty XML file
769        let xml_content = "<root></root>";
770
771        let mut temp_file = NamedTempFile::new().unwrap();
772        temp_file.write_all(xml_content.as_bytes()).unwrap();
773
774        let reader = XmlItemReaderBuilder::<TestItem>::new()
775            .tag("TestItem")
776            .from_path(temp_file.path())
777            .unwrap();
778
779        // Should return None immediately - no items to read
780        assert!(reader.read().unwrap().is_none());
781    }
782
783    #[test]
784    fn test_xml_with_empty_tags() {
785        // XML with empty tags that match our target
786        let xml_content = r#"
787            <root>
788                <TestItem>
789                    <name></name>
790                    <value>0</value>
791                </TestItem>
792                <TestItem>
793                    <name></name>
794                    <value>0</value>
795                </TestItem>
796            </root>
797        "#;
798
799        let mut temp_file = NamedTempFile::new().unwrap();
800        temp_file.write_all(xml_content.as_bytes()).unwrap();
801
802        let reader = XmlItemReaderBuilder::<TestItem>::new()
803            .tag("TestItem")
804            .from_path(temp_file.path())
805            .unwrap();
806
807        // Both items should be read as default values
808        let item1 = reader.read().unwrap().unwrap();
809        assert_eq!(item1.name, "");
810        assert_eq!(item1.value, 0);
811
812        let item2 = reader.read().unwrap().unwrap();
813        assert_eq!(item2.name, "");
814        assert_eq!(item2.value, 0);
815
816        assert!(reader.read().unwrap().is_none());
817    }
818
819    #[test]
820    fn test_xml_with_attributes() {
821        // Define a type that captures XML attributes
822        #[derive(Debug, Deserialize, Serialize, PartialEq)]
823        struct ItemWithAttrs {
824            #[serde(rename = "@id")]
825            id: String,
826            #[serde(rename = "@type")]
827            item_type: String,
828            content: String,
829        }
830
831        let xml_content = r#"
832            <root>
833                <item id="1" type="normal">
834                    <content>First item</content>
835                </item>
836                <item id="2" type="special">
837                    <content>Second item</content>
838                </item>
839            </root>
840        "#;
841
842        let mut temp_file = NamedTempFile::new().unwrap();
843        temp_file.write_all(xml_content.as_bytes()).unwrap();
844
845        let reader = XmlItemReaderBuilder::<ItemWithAttrs>::new()
846            .tag("item")
847            .from_path(temp_file.path())
848            .unwrap();
849
850        let item1 = reader.read().unwrap().unwrap();
851        assert_eq!(item1.id, "1");
852        assert_eq!(item1.item_type, "normal");
853        assert_eq!(item1.content, "First item");
854
855        let item2 = reader.read().unwrap().unwrap();
856        assert_eq!(item2.id, "2");
857        assert_eq!(item2.item_type, "special");
858        assert_eq!(item2.content, "Second item");
859
860        assert!(reader.read().unwrap().is_none());
861    }
862
863    #[test]
864    fn test_xml_with_cdata() {
865        // Test with CDATA sections which may contain special characters
866        let xml_content = r#"
867            <root>
868                <TestItem>
869                    <name><![CDATA[name with <special> & chars]]></name>
870                    <value>42</value>
871                </TestItem>
872                <TestItem>
873                    <name>regular name</name>
874                    <value><![CDATA[55]]></value>
875                </TestItem>
876            </root>
877        "#;
878
879        let mut temp_file = NamedTempFile::new().unwrap();
880        temp_file.write_all(xml_content.as_bytes()).unwrap();
881
882        let reader = XmlItemReaderBuilder::<TestItem>::new()
883            .tag("TestItem")
884            .from_path(temp_file.path())
885            .unwrap();
886
887        let item1 = reader.read().unwrap().unwrap();
888        assert_eq!(item1.name, "name with <special> & chars");
889        assert_eq!(item1.value, 42);
890
891        let item2 = reader.read().unwrap().unwrap();
892        assert_eq!(item2.name, "regular name");
893        assert_eq!(item2.value, 55);
894
895        assert!(reader.read().unwrap().is_none());
896    }
897
898    #[test]
899    fn test_malformed_xml() {
900        // Malformed XML with unclosed tags
901        let xml_content = r#"
902            <root>
903                <TestItem>
904                    <name>test1</name>
905                    <value>42
906                </TestItem>
907            </root>
908        "#;
909
910        let mut temp_file = NamedTempFile::new().unwrap();
911        temp_file.write_all(xml_content.as_bytes()).unwrap();
912
913        let reader = XmlItemReaderBuilder::<TestItem>::new()
914            .tag("TestItem")
915            .from_path(temp_file.path())
916            .unwrap();
917
918        // Should return an error when trying to read
919        let result = reader.read();
920        assert!(result.is_err());
921    }
922
923    #[test]
924    fn test_xml_type_mismatch() {
925        // XML with a value that doesn't match the expected type
926        let xml_content = r#"
927            <root>
928                <TestItem>
929                    <name>test1</name>
930                    <value>not_a_number</value>
931                </TestItem>
932            </root>
933        "#;
934
935        let mut temp_file = NamedTempFile::new().unwrap();
936        temp_file.write_all(xml_content.as_bytes()).unwrap();
937
938        let reader = XmlItemReaderBuilder::<TestItem>::new()
939            .tag("TestItem")
940            .from_path(temp_file.path())
941            .unwrap();
942
943        // Should return an error when trying to deserialize
944        let result = reader.read();
945        assert!(result.is_err()); // Should return an error for type mismatch
946    }
947
948    #[test]
949    fn test_default_tag_inference() {
950        // When tag is not specified, it should use the type name
951        let xml_content = r#"
952            <root>
953                <TestItem>
954                    <name>test1</name>
955                    <value>42</value>
956                </TestItem>
957            </root>
958        "#;
959
960        let mut temp_file = NamedTempFile::new().unwrap();
961        temp_file.write_all(xml_content.as_bytes()).unwrap();
962
963        // Notice we don't specify the tag
964        let reader = XmlItemReaderBuilder::<TestItem>::new()
965            .from_path(temp_file.path())
966            .unwrap();
967
968        // Should infer the tag name from the type
969        let item = reader.read().unwrap().unwrap();
970        assert_eq!(item.name, "test1");
971        assert_eq!(item.value, 42);
972
973        assert!(reader.read().unwrap().is_none());
974    }
975
976    #[test]
977    fn test_read_from_memory() {
978        // Test reading directly from a memory buffer
979        let xml_content = r#"
980            <root>
981                <TestItem>
982                    <name>memory test</name>
983                    <value>100</value>
984                </TestItem>
985            </root>
986        "#;
987
988        // Create an in-memory reader
989        let reader = XmlItemReaderBuilder::<TestItem>::new()
990            .tag("TestItem")
991            .from_reader(xml_content.as_bytes());
992
993        // Should read correctly from memory
994        let item = reader.read().unwrap().unwrap();
995        assert_eq!(item.name, "memory test");
996        assert_eq!(item.value, 100);
997
998        assert!(reader.read().unwrap().is_none());
999    }
1000
1001    #[test]
1002    fn test_xml_reader_with_invalid_xml() {
1003        let invalid_xml = r#"
1004        <items>
1005            <item>
1006                <name>Invalid Item</name>
1007                <value>123
1008            </item>
1009        </items>
1010        "#;
1011
1012        let cursor = Cursor::new(invalid_xml);
1013        let reader = XmlItemReaderBuilder::<TestItem>::new()
1014            .tag("item")
1015            .from_reader(cursor);
1016
1017        // Should handle malformed XML gracefully
1018        let result = reader.read();
1019        assert!(result.is_err());
1020    }
1021
1022    #[test]
1023    fn test_xml_reader_with_empty_file() {
1024        let empty_xml = "";
1025        let cursor = Cursor::new(empty_xml);
1026        let reader = XmlItemReaderBuilder::<TestItem>::new()
1027            .tag("item")
1028            .from_reader(cursor);
1029
1030        let result = reader.read();
1031        assert!(result.is_ok());
1032        assert!(result.unwrap().is_none());
1033    }
1034
1035    #[test]
1036    fn test_xml_reader_with_no_matching_tags() {
1037        let xml_data = r#"
1038        <root>
1039            <other>
1040                <name>Not an item</name>
1041                <value>123</value>
1042            </other>
1043        </root>
1044        "#;
1045
1046        let cursor = Cursor::new(xml_data);
1047        let reader = XmlItemReaderBuilder::<TestItem>::new()
1048            .tag("item")
1049            .from_reader(cursor);
1050
1051        let result = reader.read();
1052        assert!(result.is_ok());
1053        assert!(result.unwrap().is_none());
1054    }
1055
1056    #[test]
1057    fn test_xml_reader_builder_with_custom_capacity() {
1058        let xml_data = r#"
1059        <items>
1060            <item>
1061                <name>Test Item</name>
1062                <value>123</value>
1063            </item>
1064        </items>
1065        "#;
1066
1067        let cursor = Cursor::new(xml_data);
1068        let reader = XmlItemReaderBuilder::<TestItem>::new()
1069            .tag("item")
1070            .capacity(2048)
1071            .from_reader(cursor);
1072
1073        let result = reader.read();
1074        assert!(result.is_ok());
1075        assert!(result.unwrap().is_some());
1076    }
1077
1078    #[test]
1079    fn test_xml_reader_with_nested_elements() {
1080        #[derive(Debug, Deserialize, PartialEq)]
1081        struct NestedItem {
1082            name: String,
1083            value: i32,
1084        }
1085
1086        let xml_data = r#"
1087        <items>
1088            <nested>
1089                <name>Nested Item</name>
1090                <value>456</value>
1091            </nested>
1092        </items>
1093        "#;
1094
1095        let cursor = Cursor::new(xml_data);
1096        let reader = XmlItemReaderBuilder::<NestedItem>::new()
1097            .tag("nested")
1098            .from_reader(cursor);
1099
1100        let result = reader.read();
1101        assert!(result.is_ok());
1102        let item = result.unwrap().unwrap();
1103        assert_eq!(item.name, "Nested Item");
1104        assert_eq!(item.value, 456);
1105    }
1106
1107    #[test]
1108    fn test_xml_reader_with_multiple_reads() {
1109        let xml_data = r#"
1110        <items>
1111            <item>
1112                <name>First Item</name>
1113                <value>100</value>
1114            </item>
1115            <item>
1116                <name>Second Item</name>
1117                <value>200</value>
1118            </item>
1119            <item>
1120                <name>Third Item</name>
1121                <value>300</value>
1122            </item>
1123        </items>
1124        "#;
1125
1126        let cursor = Cursor::new(xml_data);
1127        let reader = XmlItemReaderBuilder::<TestItem>::new()
1128            .tag("item")
1129            .from_reader(cursor);
1130
1131        // Read all items
1132        let mut items = Vec::new();
1133        while let Some(item) = reader.read().unwrap() {
1134            items.push(item);
1135        }
1136
1137        assert_eq!(items.len(), 3);
1138        assert_eq!(items[0].name, "First Item");
1139        assert_eq!(items[1].name, "Second Item");
1140        assert_eq!(items[2].name, "Third Item");
1141    }
1142
1143    #[test]
1144    fn test_xml_reader_with_whitespace_handling() {
1145        let xml_data = r#"
1146        <items>
1147            <item>
1148                <name>Whitespace Item</name>
1149                <value>789</value>
1150            </item>
1151        </items>
1152        "#;
1153
1154        let cursor = Cursor::new(xml_data);
1155        let reader = XmlItemReaderBuilder::<TestItem>::new()
1156            .tag("item")
1157            .from_reader(cursor);
1158
1159        let result = reader.read();
1160        assert!(result.is_ok());
1161        let item = result.unwrap().unwrap();
1162        assert_eq!(item.name, "Whitespace Item");
1163        assert_eq!(item.value, 789);
1164    }
1165
1166    #[test]
1167    fn test_xml_reader_from_path_error_handling() {
1168        let result = XmlItemReaderBuilder::<TestItem>::new()
1169            .tag("item")
1170            .from_path("/nonexistent/path/file.xml");
1171
1172        assert!(result.is_err());
1173    }
1174
1175    #[test]
1176    fn test_xml_reader_with_special_characters() {
1177        let xml_data = r#"
1178        <items>
1179            <item>
1180                <name>Special &amp; Characters &lt;&gt;</name>
1181                <value>999</value>
1182            </item>
1183        </items>
1184        "#;
1185
1186        let cursor = Cursor::new(xml_data);
1187        let reader = XmlItemReaderBuilder::<TestItem>::new()
1188            .tag("item")
1189            .from_reader(cursor);
1190
1191        let result = reader.read();
1192        assert!(result.is_ok());
1193        let item = result.unwrap().unwrap();
1194        assert_eq!(item.name, "Special & Characters <>");
1195        assert_eq!(item.value, 999);
1196    }
1197
1198    #[test]
1199    fn test_xml_reader_builder_default() {
1200        let builder1 = XmlItemReaderBuilder::<TestItem>::new();
1201        let builder2 = XmlItemReaderBuilder::<TestItem>::default();
1202
1203        // Both should have the same default values
1204        assert_eq!(builder1.capacity, builder2.capacity);
1205        assert_eq!(builder1.tag_name, builder2.tag_name);
1206    }
1207
1208    #[test]
1209    fn should_derive_tag_from_type_name_when_not_set_in_from_reader() {
1210        // This test calls from_reader() without .tag() → exercises the None branch (line 221)
1211        let xml_content =
1212            r#"<root><TestItem><name>derived</name><value>7</value></TestItem></root>"#;
1213        let cursor = Cursor::new(xml_content);
1214
1215        // No .tag() call → tag derived from type name "TestItem"
1216        let reader = XmlItemReaderBuilder::<TestItem>::new().from_reader(cursor);
1217
1218        let item = reader.read().unwrap().unwrap();
1219        assert_eq!(item.name, "derived");
1220        assert_eq!(item.value, 7);
1221    }
1222
1223    #[test]
1224    fn should_return_error_on_unexpected_eof_inside_item() {
1225        // Truncated XML — the item tag is opened but never closed → EOF inside inner loop
1226        let xml_content = r#"<root><TestItem><name>truncated"#;
1227        let cursor = Cursor::new(xml_content);
1228
1229        let reader = XmlItemReaderBuilder::<TestItem>::new()
1230            .tag("TestItem")
1231            .from_reader(cursor);
1232
1233        let result = reader.read();
1234        assert!(result.is_err(), "expected error for truncated XML");
1235        match result {
1236            Err(BatchError::ItemReader(msg)) => {
1237                assert!(
1238                    msg.contains("Unexpected end of file") || msg.contains("XML"),
1239                    "unexpected error message: {msg}"
1240                );
1241            }
1242            other => panic!("expected ItemReader error, got {other:?}"),
1243        }
1244    }
1245
1246    #[test]
1247    fn should_ignore_xml_comments_inside_items() {
1248        // A Comment event inside an item body exercises the `_ => {}` branch (line 477)
1249        let xml_content = r#"<root><TestItem><!-- a comment --><name>commented</name><value>5</value></TestItem></root>"#;
1250        let cursor = Cursor::new(xml_content);
1251
1252        let reader = XmlItemReaderBuilder::<TestItem>::new()
1253            .tag("TestItem")
1254            .from_reader(cursor);
1255
1256        let item = reader.read().unwrap().unwrap();
1257        assert_eq!(item.name, "commented");
1258        assert_eq!(item.value, 5);
1259    }
1260}