spring_batch_rs/item/csv/csv_reader.rs
1use csv::{ReaderBuilder, StringRecordsIntoIter, Terminator, Trim};
2use serde::de::DeserializeOwned;
3use std::{cell::RefCell, fs::File, io::Read, marker::PhantomData, path::Path};
4
5use crate::{
6 core::item::{ItemReader, ItemReaderResult},
7 error::BatchError,
8};
9
10/// A CSV item reader that implements the `ItemReader` trait.
11///
12/// This reader deserializes CSV data into Rust structs row by row
13/// using Serde's deserialization capabilities. It can process CSV
14/// data from files, strings, or any source implementing the `Read` trait.
15///
16/// # Type Parameters
17///
18/// - `R`: The type of reader providing the CSV data. Must implement `Read`.
19///
20/// # Implementation Details
21///
22/// - Uses a `RefCell` to provide interior mutability for the CSV record iterator
23/// - Requires `DeserializeOwned` for types that can be deserialized from CSV rows
24/// - Automatically converts CSV parsing errors into Spring Batch errors
25/// - Allows streaming data processing without loading the entire file into memory
26///
27/// # Examples
28///
29/// ```
30/// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
31/// use spring_batch_rs::core::item::ItemReader;
32/// use serde::Deserialize;
33///
34/// #[derive(Debug, Deserialize)]
35/// struct Record {
36/// name: String,
37/// value: i32,
38/// }
39///
40/// // Create a CSV string
41/// let data = "\
42/// name,value
43/// foo,123
44/// bar,456
45/// ";
46///
47/// // Build a reader
48/// let reader = CsvItemReaderBuilder::<Record>::new()
49/// .has_headers(true)
50/// .from_reader(data.as_bytes());
51///
52/// // Read the first record
53/// let record: Record = reader.read().unwrap().unwrap();
54/// assert_eq!(record.name, "foo");
55/// assert_eq!(record.value, 123);
56///
57/// // Read the second record
58/// let record: Record = reader.read().unwrap().unwrap();
59/// assert_eq!(record.name, "bar");
60/// assert_eq!(record.value, 456);
61///
62/// // No more records - explicitly use Record type again
63/// assert!(ItemReader::<Record>::read(&reader).unwrap().is_none());
64/// ```
65pub struct CsvItemReader<R: Read> {
66 /// Iterator over the CSV records
67 ///
68 /// Uses `RefCell` to provide interior mutability so we can iterate
69 /// through records while keeping the `read` method signature compatible
70 /// with the `ItemReader` trait.
71 records: RefCell<StringRecordsIntoIter<R>>,
72}
73
74impl<I: DeserializeOwned, R: Read> ItemReader<I> for CsvItemReader<R> {
75 /// Reads the next item from the CSV file.
76 ///
77 /// This method reads and deserializes the next row from the CSV source.
78 /// The row is converted to the specified type `T` using Serde's deserialization.
79 ///
80 /// # Deserialization Process
81 ///
82 /// 1. Gets the next record from the CSV iterator
83 /// 2. If no more records, returns `Ok(None)`
84 /// 3. Deserializes the record to type `T` using serde
85 /// 4. Wraps errors in the Spring Batch error system
86 ///
87 /// # Returns
88 /// - `Ok(Some(record))` if a record is successfully read
89 /// - `Ok(None)` if there are no more records to read
90 /// - `Err(BatchError::ItemReader(error))` if an error occurs during reading or deserialization
91 ///
92 /// # Examples
93 ///
94 /// ```
95 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
96 /// use spring_batch_rs::core::item::ItemReader;
97 /// use serde::Deserialize;
98 ///
99 /// #[derive(Debug, Deserialize)]
100 /// struct Person {
101 /// name: String,
102 /// age: u8,
103 /// }
104 ///
105 /// let data = "name,age\nAlice,30\nBob,25";
106 /// let reader = CsvItemReaderBuilder::<Person>::new()
107 /// .has_headers(true)
108 /// .from_reader(data.as_bytes());
109 ///
110 /// // Read all people
111 /// let mut people: Vec<Person> = Vec::new();
112 /// while let Some(person) = reader.read().unwrap() {
113 /// people.push(person);
114 /// }
115 ///
116 /// assert_eq!(people.len(), 2);
117 /// assert_eq!(people[0].name, "Alice");
118 /// assert_eq!(people[0].age, 30);
119 /// ```
120 fn read(&self) -> ItemReaderResult<I> {
121 // Try to get the next CSV record from the iterator
122 if let Some(result) = self.records.borrow_mut().next() {
123 match result {
124 Ok(string_record) => {
125 // Attempt to deserialize the record to type T
126 let result: Result<I, _> = string_record.deserialize(None);
127
128 match result {
129 Ok(record) => Ok(Some(record)),
130 Err(error) => Err(BatchError::ItemReader(error.to_string())),
131 }
132 }
133 Err(error) => Err(BatchError::ItemReader(error.to_string())),
134 }
135 } else {
136 // No more records in the CSV file
137 Ok(None)
138 }
139 }
140}
141
142/// A builder for configuring CSV item reading.
143///
144/// This builder allows you to customize the CSV reading behavior,
145/// including delimiter, terminator, and header handling.
146///
147/// # Design Pattern
148///
149/// This struct implements the Builder pattern, which allows for fluent, chainable
150/// configuration of a `CsvItemReader` before creation. Each method returns `self`
151/// to allow method chaining.
152///
153/// # Default Configuration
154///
155/// - Delimiter: comma (,)
156/// - Terminator: CRLF (Windows-style line endings)
157/// - Headers: disabled
158/// - Trimming: All fields trimmed
159///
160/// # Examples
161///
162/// ```
163/// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
164/// use spring_batch_rs::core::item::ItemReader;
165/// use serde::Deserialize;
166/// use csv::Terminator;
167///
168/// #[derive(Deserialize)]
169/// struct Person {
170/// name: String,
171/// age: u8,
172/// }
173///
174/// // Custom CSV configuration
175/// let reader = CsvItemReaderBuilder::<Person>::new()
176/// .delimiter(b';') // Use semicolon as delimiter
177/// .terminator(Terminator::Any(b'\n')) // Unix line endings
178/// .has_headers(true) // First row contains headers
179/// .from_reader("name;age\nAlice;30".as_bytes());
180/// ```
181#[derive(Default)]
182pub struct CsvItemReaderBuilder<I> {
183 /// The delimiter character (default: comma ',')
184 delimiter: u8,
185 /// The line terminator (default: CRLF)
186 terminator: Terminator,
187 /// Whether the CSV has headers (default: false)
188 has_headers: bool,
189 _pd: PhantomData<I>,
190}
191
192impl<I> CsvItemReaderBuilder<I> {
193 /// Creates a new `CsvItemReaderBuilder` with default configuration.
194 ///
195 /// Default settings:
196 /// - Delimiter: comma (,)
197 /// - Terminator: CRLF (Windows-style line endings)
198 /// - Headers: disabled
199 ///
200 /// # Examples
201 ///
202 /// ```
203 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
204 /// use serde::Deserialize;
205 ///
206 /// #[derive(Deserialize)]
207 /// struct Record {
208 /// field: String,
209 /// }
210 ///
211 /// let builder = CsvItemReaderBuilder::<Record>::new();
212 /// ```
213 pub fn new() -> Self {
214 Self {
215 delimiter: b',',
216 terminator: Terminator::CRLF,
217 has_headers: false,
218 _pd: PhantomData,
219 }
220 }
221
222 /// Sets the delimiter character for the CSV parsing.
223 ///
224 /// # Parameters
225 /// - `delimiter`: The character to use as a field delimiter
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
231 /// use serde::Deserialize;
232 ///
233 /// #[derive(Deserialize)]
234 /// struct Record {
235 /// field: String,
236 /// }
237 ///
238 /// // Use tab as delimiter
239 /// let builder = CsvItemReaderBuilder::<Record>::new()
240 /// .delimiter(b'\t');
241 ///
242 /// // Use semicolon as delimiter
243 /// let builder = CsvItemReaderBuilder::<Record>::new()
244 /// .delimiter(b';');
245 /// ```
246 pub fn delimiter(mut self, delimiter: u8) -> Self {
247 self.delimiter = delimiter;
248 self
249 }
250
251 /// Sets the line terminator for the CSV parsing.
252 ///
253 /// # Parameters
254 /// - `terminator`: The line terminator to use
255 ///
256 /// # Terminator Options
257 ///
258 /// - `Terminator::CRLF`: Windows-style line endings (default)
259 /// - `Terminator::Any(byte)`: Custom terminator, often `b'\n'` for Unix-style
260 ///
261 /// # Examples
262 ///
263 /// ```
264 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
265 /// use csv::Terminator;
266 /// use serde::Deserialize;
267 ///
268 /// #[derive(Deserialize)]
269 /// struct Record {
270 /// field: String,
271 /// }
272 ///
273 /// // Use Unix-style line endings (LF)
274 /// let builder = CsvItemReaderBuilder::<Record>::new()
275 /// .terminator(Terminator::Any(b'\n'));
276 /// ```
277 pub fn terminator(mut self, terminator: Terminator) -> Self {
278 self.terminator = terminator;
279 self
280 }
281
282 /// Sets whether the CSV file has headers.
283 ///
284 /// When enabled, the first row is treated as headers and is not returned
285 /// as part of the data. The header names can be used to match fields in
286 /// the deserialization process.
287 ///
288 /// # Parameters
289 /// - `yes`: Whether headers are present
290 ///
291 /// # Deserialization Impact
292 ///
293 /// When enabled, column names from headers can be matched to struct field names
294 /// during deserialization. This is often more robust than relying on column order.
295 ///
296 /// # Examples
297 ///
298 /// ```
299 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
300 /// use serde::Deserialize;
301 ///
302 /// #[derive(Deserialize)]
303 /// struct Record {
304 /// field: String,
305 /// }
306 ///
307 /// // Enable headers (first row is column names)
308 /// let builder = CsvItemReaderBuilder::<Record>::new()
309 /// .has_headers(true);
310 ///
311 /// // Disable headers (all rows are data)
312 /// let builder = CsvItemReaderBuilder::<Record>::new()
313 /// .has_headers(false);
314 /// ```
315 pub fn has_headers(mut self, yes: bool) -> Self {
316 self.has_headers = yes;
317 self
318 }
319
320 /// Creates a `CsvItemReader` from a reader.
321 ///
322 /// This allows reading CSV data from any source that implements the `Read` trait,
323 /// such as files, strings, or network connections.
324 ///
325 /// # Parameters
326 /// - `rdr`: The reader containing CSV data
327 ///
328 /// # Configuration Applied
329 ///
330 /// The following configurations are applied:
331 /// - Trims all whitespace from fields
332 /// - Uses specified delimiter (default: comma)
333 /// - Uses specified terminator (default: CRLF)
334 /// - Handles headers according to configuration
335 /// - Strict parsing (not flexible) to identify formatting issues
336 ///
337 /// # Examples
338 ///
339 /// ```
340 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
341 /// use spring_batch_rs::core::item::ItemReader;
342 /// use serde::Deserialize;
343 /// use std::io::Cursor;
344 ///
345 /// #[derive(Deserialize)]
346 /// struct Record {
347 /// id: u32,
348 /// name: String,
349 /// }
350 ///
351 /// // Read from a string
352 /// let data = "id,name\n1,Alice\n2,Bob";
353 /// let reader = CsvItemReaderBuilder::<Record>::new()
354 /// .has_headers(true)
355 /// .from_reader(data.as_bytes());
356 ///
357 /// // Or read from a Cursor
358 /// let cursor = Cursor::new("id,name\n1,Alice\n2,Bob");
359 /// let reader = CsvItemReaderBuilder::<Record>::new()
360 /// .has_headers(true)
361 /// .from_reader(cursor);
362 /// ```
363 pub fn from_reader<R: Read>(self, rdr: R) -> CsvItemReader<R> {
364 // Configure the CSV reader with builder options
365 let rdr = ReaderBuilder::new()
366 .trim(Trim::All) // Trim whitespace from all fields
367 .delimiter(self.delimiter)
368 .terminator(self.terminator)
369 .has_headers(self.has_headers)
370 .flexible(false) // Use strict parsing to catch formatting errors
371 .from_reader(rdr);
372
373 // Convert to a record iterator
374 let records = rdr.into_records();
375
376 CsvItemReader {
377 records: RefCell::new(records),
378 }
379 }
380
381 /// Creates a `CsvItemReader` from a file path.
382 ///
383 /// # Parameters
384 /// - `path`: The path to the CSV file
385 ///
386 /// # Returns
387 /// A new `CsvItemReader` configured to read from the specified file
388 ///
389 /// # Panics
390 /// Panics if the file cannot be opened
391 ///
392 /// # Error Handling
393 ///
394 /// This method panics immediately if the file cannot be opened, which is appropriate
395 /// for initialization failures. Subsequent reading errors are returned as `Result` values
396 /// from the `read` method.
397 ///
398 /// # Examples
399 ///
400 /// ```no_run
401 /// use spring_batch_rs::item::csv::csv_reader::CsvItemReaderBuilder;
402 /// use spring_batch_rs::core::item::ItemReader;
403 /// use serde::Deserialize;
404 ///
405 /// #[derive(Deserialize)]
406 /// struct Record {
407 /// id: u32,
408 /// name: String,
409 /// }
410 ///
411 /// // Read from a file
412 /// let reader = CsvItemReaderBuilder::<Record>::new()
413 /// .has_headers(true)
414 /// .from_path("data.csv");
415 ///
416 /// // Process records
417 /// let mut records: Vec<Record> = Vec::new();
418 /// while let Some(record) = ItemReader::<Record>::read(&reader).unwrap() {
419 /// println!("ID: {}, Name: {}", record.id, record.name);
420 /// records.push(record);
421 /// }
422 /// ```
423 pub fn from_path<R: AsRef<Path>>(self, path: R) -> CsvItemReader<File> {
424 // Configure the CSV reader with builder options
425 let rdr = ReaderBuilder::new()
426 .trim(Trim::All) // Trim whitespace from all fields
427 .delimiter(self.delimiter)
428 .terminator(self.terminator)
429 .has_headers(self.has_headers)
430 .flexible(false) // Use strict parsing to catch formatting errors
431 .from_path(path);
432
433 // Unwrap here is appropriate since file opening is an initialization step
434 // If it fails, we want to fail fast rather than returning an error
435 let records = rdr.unwrap().into_records();
436
437 CsvItemReader {
438 records: RefCell::new(records),
439 }
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446 use crate::core::item::ItemReader;
447 use csv::StringRecord;
448 use serde::Deserialize;
449 use std::error::Error;
450 use std::io::Write;
451 use tempfile::NamedTempFile;
452
453 #[derive(Debug, Deserialize, PartialEq)]
454 struct City {
455 city: String,
456 country: String,
457 pop: u32,
458 }
459
460 /// Tests basic CSV parsing functionality
461 ///
462 /// This test verifies that the CsvItemReader can correctly parse
463 /// CSV data with headers and multiple records.
464 #[test]
465 fn should_parse_string_records_with_headers() -> Result<(), Box<dyn Error>> {
466 let data = "city,country,pop
467 Boston,United States,4628910
468 Concord,United States,42695";
469
470 let reader = CsvItemReaderBuilder::<City>::new()
471 .has_headers(true)
472 .delimiter(b',')
473 .from_reader(data.as_bytes());
474
475 let records = reader
476 .records
477 .into_inner()
478 .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
479
480 assert_eq!(
481 records,
482 vec![
483 vec!["Boston", "United States", "4628910"],
484 vec!["Concord", "United States", "42695"],
485 ]
486 );
487
488 Ok(())
489 }
490
491 /// Test deserializing typed records using ItemReader trait implementation
492 #[test]
493 fn test_deserialize_typed_records() -> Result<(), Box<dyn Error>> {
494 let data = "city,country,pop
495 Boston,United States,4628910
496 Concord,United States,42695";
497
498 let reader = CsvItemReaderBuilder::<City>::new()
499 .has_headers(true)
500 .from_reader(data.as_bytes());
501
502 // Read first record
503 let record1: City = reader.read()?.unwrap();
504 assert_eq!(
505 record1,
506 City {
507 city: "Boston".to_string(),
508 country: "United States".to_string(),
509 pop: 4628910,
510 }
511 );
512
513 // Read second record
514 let record2: City = reader.read()?.unwrap();
515 assert_eq!(
516 record2,
517 City {
518 city: "Concord".to_string(),
519 country: "United States".to_string(),
520 pop: 42695,
521 }
522 );
523
524 // No more records
525 assert!(ItemReader::<City>::read(&reader)?.is_none());
526
527 Ok(())
528 }
529
530 /// Test reading from a file
531 #[test]
532 fn test_read_from_file() -> Result<(), Box<dyn Error>> {
533 // Create a temporary file
534 let mut temp_file = NamedTempFile::new()?;
535 let csv_content = "city,country,pop\nParis,France,2161000\nLyon,France,513275";
536 temp_file.write_all(csv_content.as_bytes())?;
537
538 // Create reader from file path
539 let reader = CsvItemReaderBuilder::<City>::new()
540 .has_headers(true)
541 .from_path(temp_file.path());
542
543 // Read records
544 let city1: City = reader.read()?.unwrap();
545 let city2: City = reader.read()?.unwrap();
546
547 assert_eq!(city1.city, "Paris");
548 assert_eq!(city2.city, "Lyon");
549 assert_eq!(city1.pop, 2161000);
550 assert_eq!(city2.pop, 513275);
551
552 Ok(())
553 }
554
555 /// Test different CSV formats (delimiters, terminators)
556 #[test]
557 fn test_different_csv_formats() -> Result<(), Box<dyn Error>> {
558 // Test with semicolon delimiter and LF terminator
559 let data = "city;country;pop\nBerlin;Germany;3645000\nMunich;Germany;1472000";
560
561 let reader = CsvItemReaderBuilder::<City>::new()
562 .has_headers(true)
563 .delimiter(b';')
564 .terminator(Terminator::Any(b'\n'))
565 .from_reader(data.as_bytes());
566
567 let city1: City = reader.read()?.unwrap();
568 let city2: City = reader.read()?.unwrap();
569
570 assert_eq!(city1.city, "Berlin");
571 assert_eq!(city2.city, "Munich");
572 assert_eq!(city1.country, "Germany");
573
574 Ok(())
575 }
576
577 /// Test reading without headers
578 #[test]
579 fn test_no_headers() -> Result<(), Box<dyn Error>> {
580 #[derive(Debug, Deserialize, PartialEq)]
581 struct Record {
582 field1: String,
583 field2: String,
584 field3: u32,
585 }
586
587 let data = "Tokyo,Japan,13960000\nOsaka,Japan,2691000";
588
589 let reader = CsvItemReaderBuilder::<Record>::new()
590 .has_headers(false)
591 .from_reader(data.as_bytes());
592
593 let record1: Record = ItemReader::<Record>::read(&reader)?.unwrap();
594 let record2: Record = ItemReader::<Record>::read(&reader)?.unwrap();
595
596 assert_eq!(
597 record1,
598 Record {
599 field1: "Tokyo".to_string(),
600 field2: "Japan".to_string(),
601 field3: 13960000,
602 }
603 );
604
605 assert_eq!(
606 record2,
607 Record {
608 field1: "Osaka".to_string(),
609 field2: "Japan".to_string(),
610 field3: 2691000,
611 }
612 );
613
614 Ok(())
615 }
616
617 /// Test error handling for malformed CSV
618 #[test]
619 fn test_deserialization_error() {
620 // Malformed data - "not_a_number" isn't a valid u32
621 let data = "city,country,pop\nMilan,Italy,not_a_number";
622
623 let reader = CsvItemReaderBuilder::<City>::new()
624 .has_headers(true)
625 .from_reader(data.as_bytes());
626
627 // Should fail to deserialize because "not_a_number" isn't a valid u32
628 let result = ItemReader::<City>::read(&reader);
629 assert!(result.is_err());
630 }
631
632 /// Test reading an empty file
633 #[test]
634 fn test_empty_file() -> Result<(), Box<dyn Error>> {
635 let data = "";
636
637 let reader = CsvItemReaderBuilder::<City>::new()
638 .has_headers(false)
639 .from_reader(data.as_bytes());
640
641 assert!(ItemReader::<City>::read(&reader)?.is_none());
642
643 Ok(())
644 }
645
646 /// Test reading only headers with no data
647 #[test]
648 fn test_headers_only() -> Result<(), Box<dyn Error>> {
649 let data = "city,country,pop";
650
651 let reader = CsvItemReaderBuilder::<City>::new()
652 .has_headers(true)
653 .from_reader(data.as_bytes());
654
655 assert!(ItemReader::<City>::read(&reader)?.is_none());
656
657 Ok(())
658 }
659}