Skip to main content

spring_batch_rs/item/csv/
csv_reader.rs

1use csv::{ReaderBuilder, StringRecordsIntoIter, Terminator, Trim};
2use serde::de::DeserializeOwned;
3use std::{cell::RefCell, fs::File, io::Read, marker::PhantomData, path::Path};
4
5use crate::{
6    core::item::{ItemReader, ItemReaderResult},
7    error::BatchError,
8};
9
10/// A CSV item reader that implements the `ItemReader` trait.
11///
12/// This reader deserializes CSV data into Rust structs row by row
13/// using Serde's deserialization capabilities. It can process CSV
14/// data from files, strings, or any source implementing the `Read` trait.
15///
16/// # Type Parameters
17///
18/// - `R`: The type of reader providing the CSV data. Must implement `Read`.
19///
20/// # Implementation Details
21///
22/// - Uses a `RefCell` to provide interior mutability for the CSV record iterator
23/// - Requires `DeserializeOwned` for types that can be deserialized from CSV rows
24/// - Automatically converts CSV parsing errors into Spring Batch errors
25/// - Allows streaming data processing without loading the entire file into memory
26///
27/// # Examples
28///
29/// ```
30/// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
31/// use spring_batch_rs::core::item::ItemReader;
32/// use serde::Deserialize;
33///
34/// #[derive(Debug, Deserialize)]
35/// struct Record {
36///     name: String,
37///     value: i32,
38/// }
39///
40/// // Create a CSV string
41/// let data = "\
42/// name,value
43/// foo,123
44/// bar,456
45/// ";
46///
47/// // Build a reader
48/// let reader = CsvItemReaderBuilder::<Record>::new()
49///     .has_headers(true)
50///     .from_reader(data.as_bytes());
51///
52/// // Read the first record
53/// let record: Record = reader.read().unwrap().unwrap();
54/// assert_eq!(record.name, "foo");
55/// assert_eq!(record.value, 123);
56///
57/// // Read the second record
58/// let record: Record = reader.read().unwrap().unwrap();
59/// assert_eq!(record.name, "bar");
60/// assert_eq!(record.value, 456);
61///
62/// // No more records - explicitly use Record type again
63/// assert!(ItemReader::<Record>::read(&reader).unwrap().is_none());
64/// ```
65pub struct CsvItemReader<R: Read> {
66    /// Iterator over the CSV records
67    ///
68    /// Uses `RefCell` to provide interior mutability so we can iterate
69    /// through records while keeping the `read` method signature compatible
70    /// with the `ItemReader` trait.
71    records: RefCell<StringRecordsIntoIter<R>>,
72}
73
74impl<I: DeserializeOwned, R: Read> ItemReader<I> for CsvItemReader<R> {
75    /// Reads the next item from the CSV file.
76    ///
77    /// This method reads and deserializes the next row from the CSV source.
78    /// The row is converted to the specified type `T` using Serde's deserialization.
79    ///
80    /// # Deserialization Process
81    ///
82    /// 1. Gets the next record from the CSV iterator
83    /// 2. If no more records, returns `Ok(None)`
84    /// 3. Deserializes the record to type `T` using serde
85    /// 4. Wraps errors in the Spring Batch error system
86    ///
87    /// # Returns
88    /// - `Ok(Some(record))` if a record is successfully read
89    /// - `Ok(None)` if there are no more records to read
90    /// - `Err(BatchError::ItemReader(error))` if an error occurs during reading or deserialization
91    ///
92    /// # Examples
93    ///
94    /// ```
95    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
96    /// use spring_batch_rs::core::item::ItemReader;
97    /// use serde::Deserialize;
98    ///
99    /// #[derive(Debug, Deserialize)]
100    /// struct Person {
101    ///     name: String,
102    ///     age: u8,
103    /// }
104    ///
105    /// let data = "name,age\nAlice,30\nBob,25";
106    /// let reader = CsvItemReaderBuilder::<Person>::new()
107    ///     .has_headers(true)
108    ///     .from_reader(data.as_bytes());
109    ///
110    /// // Read all people
111    /// let mut people: Vec<Person> = Vec::new();
112    /// while let Some(person) = reader.read().unwrap() {
113    ///     people.push(person);
114    /// }
115    ///
116    /// assert_eq!(people.len(), 2);
117    /// assert_eq!(people[0].name, "Alice");
118    /// assert_eq!(people[0].age, 30);
119    /// ```
120    fn read(&self) -> ItemReaderResult<I> {
121        // Try to get the next CSV record from the iterator
122        if let Some(result) = self.records.borrow_mut().next() {
123            match result {
124                Ok(string_record) => {
125                    // Attempt to deserialize the record to type T
126                    let result: Result<I, _> = string_record.deserialize(None);
127
128                    match result {
129                        Ok(record) => Ok(Some(record)),
130                        Err(error) => Err(BatchError::ItemReader(error.to_string())),
131                    }
132                }
133                Err(error) => Err(BatchError::ItemReader(error.to_string())),
134            }
135        } else {
136            // No more records in the CSV file
137            Ok(None)
138        }
139    }
140}
141
142/// A builder for configuring CSV item reading.
143///
144/// This builder allows you to customize the CSV reading behavior,
145/// including delimiter, terminator, and header handling.
146///
147/// # Design Pattern
148///
149/// This struct implements the Builder pattern, which allows for fluent, chainable
150/// configuration of a `CsvItemReader` before creation. Each method returns `self`
151/// to allow method chaining.
152///
153/// # Default Configuration
154///
155/// - Delimiter: comma (,)
156/// - Terminator: CRLF (Windows-style line endings)
157/// - Headers: disabled
158/// - Trimming: All fields trimmed
159///
160/// # Examples
161///
162/// ```
163/// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
164/// use spring_batch_rs::core::item::ItemReader;
165/// use serde::Deserialize;
166/// use csv::Terminator;
167///
168/// #[derive(Deserialize)]
169/// struct Person {
170///     name: String,
171///     age: u8,
172/// }
173///
174/// // Custom CSV configuration
175/// let reader = CsvItemReaderBuilder::<Person>::new()
176///     .delimiter(b';')  // Use semicolon as delimiter
177///     .terminator(Terminator::Any(b'\n'))  // Unix line endings
178///     .has_headers(true)  // First row contains headers
179///     .from_reader("name;age\nAlice;30".as_bytes());
180/// ```
181#[derive(Default)]
182pub struct CsvItemReaderBuilder<I> {
183    /// The delimiter character (default: comma ',')
184    delimiter: u8,
185    /// The line terminator (default: CRLF)
186    terminator: Terminator,
187    /// Whether the CSV has headers (default: false)
188    has_headers: bool,
189    _pd: PhantomData<I>,
190}
191
192impl<I> CsvItemReaderBuilder<I> {
193    /// Creates a new `CsvItemReaderBuilder` with default configuration.
194    ///
195    /// Default settings:
196    /// - Delimiter: comma (,)
197    /// - Terminator: CRLF (Windows-style line endings)
198    /// - Headers: disabled
199    ///
200    /// # Examples
201    ///
202    /// ```
203    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
204    /// use serde::Deserialize;
205    ///
206    /// #[derive(Deserialize)]
207    /// struct Record {
208    ///     field: String,
209    /// }
210    ///
211    /// let builder = CsvItemReaderBuilder::<Record>::new();
212    /// ```
213    pub fn new() -> Self {
214        Self {
215            delimiter: b',',
216            terminator: Terminator::CRLF,
217            has_headers: false,
218            _pd: PhantomData,
219        }
220    }
221
222    /// Sets the delimiter character for the CSV parsing.
223    ///
224    /// # Parameters
225    /// - `delimiter`: The character to use as a field delimiter
226    ///
227    /// # Examples
228    ///
229    /// ```
230    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
231    /// use serde::Deserialize;
232    ///
233    /// #[derive(Deserialize)]
234    /// struct Record {
235    ///     field: String,
236    /// }
237    ///
238    /// // Use tab as delimiter
239    /// let builder = CsvItemReaderBuilder::<Record>::new()
240    ///     .delimiter(b'\t');
241    ///
242    /// // Use semicolon as delimiter
243    /// let builder = CsvItemReaderBuilder::<Record>::new()
244    ///     .delimiter(b';');
245    /// ```
246    pub fn delimiter(mut self, delimiter: u8) -> Self {
247        self.delimiter = delimiter;
248        self
249    }
250
251    /// Sets the line terminator for the CSV parsing.
252    ///
253    /// # Parameters
254    /// - `terminator`: The line terminator to use
255    ///
256    /// # Terminator Options
257    ///
258    /// - `Terminator::CRLF`: Windows-style line endings (default)
259    /// - `Terminator::Any(byte)`: Custom terminator, often `b'\n'` for Unix-style
260    ///
261    /// # Examples
262    ///
263    /// ```
264    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
265    /// use csv::Terminator;
266    /// use serde::Deserialize;
267    ///
268    /// #[derive(Deserialize)]
269    /// struct Record {
270    ///     field: String,
271    /// }
272    ///
273    /// // Use Unix-style line endings (LF)
274    /// let builder = CsvItemReaderBuilder::<Record>::new()
275    ///     .terminator(Terminator::Any(b'\n'));
276    /// ```
277    pub fn terminator(mut self, terminator: Terminator) -> Self {
278        self.terminator = terminator;
279        self
280    }
281
282    /// Sets whether the CSV file has headers.
283    ///
284    /// When enabled, the first row is treated as headers and is not returned
285    /// as part of the data. The header names can be used to match fields in
286    /// the deserialization process.
287    ///
288    /// # Parameters
289    /// - `yes`: Whether headers are present
290    ///
291    /// # Deserialization Impact
292    ///
293    /// When enabled, column names from headers can be matched to struct field names
294    /// during deserialization. This is often more robust than relying on column order.
295    ///
296    /// # Examples
297    ///
298    /// ```
299    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
300    /// use serde::Deserialize;
301    ///
302    /// #[derive(Deserialize)]
303    /// struct Record {
304    ///     field: String,
305    /// }
306    ///
307    /// // Enable headers (first row is column names)
308    /// let builder = CsvItemReaderBuilder::<Record>::new()
309    ///     .has_headers(true);
310    ///
311    /// // Disable headers (all rows are data)
312    /// let builder = CsvItemReaderBuilder::<Record>::new()
313    ///     .has_headers(false);
314    /// ```
315    pub fn has_headers(mut self, yes: bool) -> Self {
316        self.has_headers = yes;
317        self
318    }
319
320    /// Creates a `CsvItemReader` from a reader.
321    ///
322    /// This allows reading CSV data from any source that implements the `Read` trait,
323    /// such as files, strings, or network connections.
324    ///
325    /// # Parameters
326    /// - `rdr`: The reader containing CSV data
327    ///
328    /// # Configuration Applied
329    ///
330    /// The following configurations are applied:
331    /// - Trims all whitespace from fields
332    /// - Uses specified delimiter (default: comma)
333    /// - Uses specified terminator (default: CRLF)
334    /// - Handles headers according to configuration
335    /// - Strict parsing (not flexible) to identify formatting issues
336    ///
337    /// # Examples
338    ///
339    /// ```
340    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
341    /// use spring_batch_rs::core::item::ItemReader;
342    /// use serde::Deserialize;
343    /// use std::io::Cursor;
344    ///
345    /// #[derive(Deserialize)]
346    /// struct Record {
347    ///     id: u32,
348    ///     name: String,
349    /// }
350    ///
351    /// // Read from a string
352    /// let data = "id,name\n1,Alice\n2,Bob";
353    /// let reader = CsvItemReaderBuilder::<Record>::new()
354    ///     .has_headers(true)
355    ///     .from_reader(data.as_bytes());
356    ///
357    /// // Or read from a Cursor
358    /// let cursor = Cursor::new("id,name\n1,Alice\n2,Bob");
359    /// let reader = CsvItemReaderBuilder::<Record>::new()
360    ///     .has_headers(true)
361    ///     .from_reader(cursor);
362    /// ```
363    pub fn from_reader<R: Read>(self, rdr: R) -> CsvItemReader<R> {
364        // Configure the CSV reader with builder options
365        let rdr = ReaderBuilder::new()
366            .trim(Trim::All) // Trim whitespace from all fields
367            .delimiter(self.delimiter)
368            .terminator(self.terminator)
369            .has_headers(self.has_headers)
370            .flexible(false) // Use strict parsing to catch formatting errors
371            .from_reader(rdr);
372
373        // Convert to a record iterator
374        let records = rdr.into_records();
375
376        CsvItemReader {
377            records: RefCell::new(records),
378        }
379    }
380
381    /// Creates a `CsvItemReader` from a file path.
382    ///
383    /// # Parameters
384    /// - `path`: The path to the CSV file
385    ///
386    /// # Returns
387    /// A new `CsvItemReader` configured to read from the specified file
388    ///
389    /// # Panics
390    /// Panics if the file cannot be opened
391    ///
392    /// # Error Handling
393    ///
394    /// This method panics immediately if the file cannot be opened, which is appropriate
395    /// for initialization failures. Subsequent reading errors are returned as `Result` values
396    /// from the `read` method.
397    ///
398    /// # Examples
399    ///
400    /// ```no_run
401    /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
402    /// use spring_batch_rs::core::item::ItemReader;
403    /// use serde::Deserialize;
404    ///
405    /// #[derive(Deserialize)]
406    /// struct Record {
407    ///     id: u32,
408    ///     name: String,
409    /// }
410    ///
411    /// // Read from a file
412    /// let reader = CsvItemReaderBuilder::<Record>::new()
413    ///     .has_headers(true)
414    ///     .from_path("data.csv");
415    ///
416    /// // Process records
417    /// let mut records: Vec<Record> = Vec::new();
418    /// while let Some(record) = ItemReader::<Record>::read(&reader).unwrap() {
419    ///     println!("ID: {}, Name: {}", record.id, record.name);
420    ///     records.push(record);
421    /// }
422    /// ```
423    pub fn from_path<R: AsRef<Path>>(self, path: R) -> CsvItemReader<File> {
424        // Configure the CSV reader with builder options
425        let rdr = ReaderBuilder::new()
426            .trim(Trim::All) // Trim whitespace from all fields
427            .delimiter(self.delimiter)
428            .terminator(self.terminator)
429            .has_headers(self.has_headers)
430            .flexible(false) // Use strict parsing to catch formatting errors
431            .from_path(path);
432
433        // Unwrap here is appropriate since file opening is an initialization step
434        // If it fails, we want to fail fast rather than returning an error
435        let records = rdr.unwrap().into_records();
436
437        CsvItemReader {
438            records: RefCell::new(records),
439        }
440    }
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446    use crate::core::item::ItemReader;
447    use csv::StringRecord;
448    use serde::Deserialize;
449    use std::error::Error;
450    use std::io::Write;
451    use tempfile::NamedTempFile;
452
453    #[derive(Debug, Deserialize, PartialEq)]
454    struct City {
455        city: String,
456        country: String,
457        pop: u32,
458    }
459
460    /// Tests basic CSV parsing functionality
461    ///
462    /// This test verifies that the CsvItemReader can correctly parse
463    /// CSV data with headers and multiple records.
464    #[test]
465    fn should_parse_string_records_with_headers() -> Result<(), Box<dyn Error>> {
466        let data = "city,country,pop
467        Boston,United States,4628910
468        Concord,United States,42695";
469
470        let reader = CsvItemReaderBuilder::<City>::new()
471            .has_headers(true)
472            .delimiter(b',')
473            .from_reader(data.as_bytes());
474
475        let records = reader
476            .records
477            .into_inner()
478            .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
479
480        assert_eq!(
481            records,
482            vec![
483                vec!["Boston", "United States", "4628910"],
484                vec!["Concord", "United States", "42695"],
485            ]
486        );
487
488        Ok(())
489    }
490
491    /// Test deserializing typed records using ItemReader trait implementation
492    #[test]
493    fn test_deserialize_typed_records() -> Result<(), Box<dyn Error>> {
494        let data = "city,country,pop
495        Boston,United States,4628910
496        Concord,United States,42695";
497
498        let reader = CsvItemReaderBuilder::<City>::new()
499            .has_headers(true)
500            .from_reader(data.as_bytes());
501
502        // Read first record
503        let record1: City = reader.read()?.unwrap();
504        assert_eq!(
505            record1,
506            City {
507                city: "Boston".to_string(),
508                country: "United States".to_string(),
509                pop: 4628910,
510            }
511        );
512
513        // Read second record
514        let record2: City = reader.read()?.unwrap();
515        assert_eq!(
516            record2,
517            City {
518                city: "Concord".to_string(),
519                country: "United States".to_string(),
520                pop: 42695,
521            }
522        );
523
524        // No more records
525        assert!(ItemReader::<City>::read(&reader)?.is_none());
526
527        Ok(())
528    }
529
530    /// Test reading from a file
531    #[test]
532    fn test_read_from_file() -> Result<(), Box<dyn Error>> {
533        // Create a temporary file
534        let mut temp_file = NamedTempFile::new()?;
535        let csv_content = "city,country,pop\nParis,France,2161000\nLyon,France,513275";
536        temp_file.write_all(csv_content.as_bytes())?;
537
538        // Create reader from file path
539        let reader = CsvItemReaderBuilder::<City>::new()
540            .has_headers(true)
541            .from_path(temp_file.path());
542
543        // Read records
544        let city1: City = reader.read()?.unwrap();
545        let city2: City = reader.read()?.unwrap();
546
547        assert_eq!(city1.city, "Paris");
548        assert_eq!(city2.city, "Lyon");
549        assert_eq!(city1.pop, 2161000);
550        assert_eq!(city2.pop, 513275);
551
552        Ok(())
553    }
554
555    /// Test different CSV formats (delimiters, terminators)
556    #[test]
557    fn test_different_csv_formats() -> Result<(), Box<dyn Error>> {
558        // Test with semicolon delimiter and LF terminator
559        let data = "city;country;pop\nBerlin;Germany;3645000\nMunich;Germany;1472000";
560
561        let reader = CsvItemReaderBuilder::<City>::new()
562            .has_headers(true)
563            .delimiter(b';')
564            .terminator(Terminator::Any(b'\n'))
565            .from_reader(data.as_bytes());
566
567        let city1: City = reader.read()?.unwrap();
568        let city2: City = reader.read()?.unwrap();
569
570        assert_eq!(city1.city, "Berlin");
571        assert_eq!(city2.city, "Munich");
572        assert_eq!(city1.country, "Germany");
573
574        Ok(())
575    }
576
577    /// Test reading without headers
578    #[test]
579    fn test_no_headers() -> Result<(), Box<dyn Error>> {
580        #[derive(Debug, Deserialize, PartialEq)]
581        struct Record {
582            field1: String,
583            field2: String,
584            field3: u32,
585        }
586
587        let data = "Tokyo,Japan,13960000\nOsaka,Japan,2691000";
588
589        let reader = CsvItemReaderBuilder::<Record>::new()
590            .has_headers(false)
591            .from_reader(data.as_bytes());
592
593        let record1: Record = ItemReader::<Record>::read(&reader)?.unwrap();
594        let record2: Record = ItemReader::<Record>::read(&reader)?.unwrap();
595
596        assert_eq!(
597            record1,
598            Record {
599                field1: "Tokyo".to_string(),
600                field2: "Japan".to_string(),
601                field3: 13960000,
602            }
603        );
604
605        assert_eq!(
606            record2,
607            Record {
608                field1: "Osaka".to_string(),
609                field2: "Japan".to_string(),
610                field3: 2691000,
611            }
612        );
613
614        Ok(())
615    }
616
617    /// Test error handling for malformed CSV
618    #[test]
619    fn test_deserialization_error() {
620        // Malformed data - "not_a_number" isn't a valid u32
621        let data = "city,country,pop\nMilan,Italy,not_a_number";
622
623        let reader = CsvItemReaderBuilder::<City>::new()
624            .has_headers(true)
625            .from_reader(data.as_bytes());
626
627        // Should fail to deserialize because "not_a_number" isn't a valid u32
628        let result = ItemReader::<City>::read(&reader);
629        assert!(result.is_err());
630    }
631
632    /// Test reading an empty file
633    #[test]
634    fn test_empty_file() -> Result<(), Box<dyn Error>> {
635        let data = "";
636
637        let reader = CsvItemReaderBuilder::<City>::new()
638            .has_headers(false)
639            .from_reader(data.as_bytes());
640
641        assert!(ItemReader::<City>::read(&reader)?.is_none());
642
643        Ok(())
644    }
645
646    /// Test reading only headers with no data
647    #[test]
648    fn test_headers_only() -> Result<(), Box<dyn Error>> {
649        let data = "city,country,pop";
650
651        let reader = CsvItemReaderBuilder::<City>::new()
652            .has_headers(true)
653            .from_reader(data.as_bytes());
654
655        assert!(ItemReader::<City>::read(&reader)?.is_none());
656
657        Ok(())
658    }
659}