Skip to main content

fwf_rs/
reader.rs

1use std::{
2    io::{BufRead, BufReader, Lines, Read},
3    ops::Range,
4};
5
6use crate::ReaderError;
7
8#[derive(Debug)]
9pub struct Reader<R> {
10    lines: Lines<BufReader<R>>,
11    widths: Vec<usize>,
12    separator_length: usize,
13    flexible_width: bool,
14    header: Option<Record>,
15}
16
17impl<R> Reader<R>
18where
19    R: Read,
20{
21    pub fn new(
22        reader: R,
23        widths: Vec<usize>,
24        separator_length: usize,
25        flexible_width: bool,
26        has_header: bool,
27    ) -> Result<Self, ReaderError> {
28        let mut lines = BufReader::new(reader).lines();
29        let header = {
30            if has_header {
31                let line = lines.next().ok_or(ReaderError::EmptyLine)??;
32                Some(Record::try_new(
33                    line,
34                    &widths,
35                    separator_length,
36                    flexible_width,
37                )?)
38            } else {
39                None
40            }
41        };
42        Ok(Self {
43            lines,
44            widths,
45            separator_length,
46            flexible_width,
47            header,
48        })
49    }
50
51    pub fn header(&self) -> Option<Record> {
52        self.header.clone()
53    }
54
55    pub fn records(self) -> RecordIter<R> {
56        RecordIter { reader: self }
57    }
58}
59
60#[derive(Debug)]
61pub struct RecordIter<R> {
62    reader: Reader<R>,
63}
64
65impl<R> RecordIter<R> {
66    pub fn header(&self) -> Option<Record> {
67        self.reader.header.clone()
68    }
69}
70
71impl<R> Iterator for RecordIter<R>
72where
73    R: Read,
74{
75    type Item = Result<Record, ReaderError>;
76
77    fn next(&mut self) -> Option<Self::Item> {
78        self.reader.lines.next().map(|result| {
79            Record::try_new(
80                result?,
81                &self.reader.widths,
82                self.reader.separator_length,
83                self.reader.flexible_width,
84            )
85        })
86    }
87}
88
89#[derive(Debug, Clone)]
90pub struct Record {
91    line: String,
92    ranges: Vec<Range<usize>>,
93}
94
95impl Record {
96    pub fn try_new(
97        line: String,
98        widths: &[usize],
99        sep_len: usize,
100        flexible_widths: bool,
101    ) -> Result<Self, ReaderError> {
102        if line.is_empty() {
103            Err(ReaderError::EmptyLine)
104        } else {
105            let mut start = 0;
106            let ranges = widths
107                .iter()
108                .copied()
109                .map(|w| {
110                    let rem = line.len() - start;
111                    match rem.cmp(&w) {
112                        std::cmp::Ordering::Less => {
113                            if flexible_widths {
114                                let rng = start..line.len();
115                                start = line.len();
116                                Ok(rng)
117                            } else {
118                                let err = ReaderError::WidthMismatch(start, w);
119                                start = line.len();
120                                Err(err)
121                            }
122                        }
123                        std::cmp::Ordering::Equal => {
124                            let rng = start..line.len();
125                            start = line.len();
126                            Ok(rng)
127                        }
128                        std::cmp::Ordering::Greater => line[start..]
129                            .char_indices()
130                            .nth(w)
131                            .map(|(i, _)| {
132                                let end = start + i;
133                                let rng = start..end;
134                                start = end + sep_len;
135                                rng
136                            })
137                            .ok_or(ReaderError::WidthMismatch(start, w)),
138                    }
139                })
140                .collect::<Result<Vec<_>, ReaderError>>()?;
141            Ok(Self { line, ranges })
142        }
143    }
144    pub fn get(&self, index: usize) -> Option<&str> {
145        self.ranges
146            .get(index)
147            .cloned()
148            .and_then(|range| self.line.get(range))
149    }
150
151    pub fn iter(&self) -> FwfFieldIter<'_> {
152        FwfFieldIter {
153            fwr: self,
154            index: 0,
155        }
156    }
157}
158
159#[derive(Debug, Clone)]
160pub struct FwfFieldIter<'a> {
161    fwr: &'a Record,
162    index: usize,
163}
164
165impl<'a> Iterator for FwfFieldIter<'a> {
166    type Item = &'a str;
167
168    fn next(&mut self) -> Option<Self::Item> {
169        self.fwr.get(self.index).inspect(|_| {
170            self.index += 1;
171        })
172    }
173}
174
175#[cfg(test)]
176mod tests {
177
178    use super::*;
179    use rand::RngExt;
180    use rand::distr::Alphanumeric;
181    use std::fs::File;
182    use std::io::Cursor;
183    use std::io::Write;
184    use std::path::PathBuf;
185
186    fn create_test_file(content: &str) -> Result<PathBuf, Box<dyn std::error::Error>> {
187        let file_path = PathBuf::from(format!(
188            "test_fwf_file_{}.txt",
189            rand::rng()
190                .sample_iter(Alphanumeric)
191                .take(16)
192                .map(char::from)
193                .collect::<String>()
194        ));
195        let mut file = File::create(&file_path)?;
196        write!(file, "{}", content)?;
197        Ok(file_path)
198    }
199
200    fn delete_test_file(path: &PathBuf) -> Result<(), Box<dyn std::error::Error>> {
201        std::fs::remove_file(path)?;
202        Ok(())
203    }
204
205    #[test]
206    fn test_fwf_file_reader_with_header() {
207        let content = "header1header2header3\n123456789\n987    654    321    \n";
208        let file_path = create_test_file(content).unwrap();
209        let widths = vec![7, 7, 7];
210
211        let reader = Reader::new(
212            File::open(file_path.clone()).unwrap(),
213            widths,
214            0,
215            false,
216            true,
217        )
218        .unwrap();
219
220        let header = reader.header().clone().unwrap();
221        assert_eq!(header.get(0), Some("header1"));
222        assert_eq!(header.get(1), Some("header2"));
223        assert_eq!(header.get(2), Some("header3"));
224
225        let mut records = reader.records();
226        assert!(matches!(
227            records.next().unwrap().unwrap_err(),
228            ReaderError::WidthMismatch(7, 7)
229        ));
230
231        let record2 = records.next().unwrap().unwrap();
232        assert_eq!(record2.get(0), Some("987    "));
233        assert_eq!(record2.get(1), Some("654    "));
234        assert_eq!(record2.get(2), Some("321    "));
235
236        assert!(records.next().is_none());
237
238        delete_test_file(&file_path).unwrap();
239    }
240
241    #[test]
242    fn test_fwf_file_reader_without_header() {
243        let content = "123456789\n987654321\n";
244        let file_path = create_test_file(content).unwrap();
245        let widths = vec![3, 3, 3];
246
247        let reader = Reader::new(
248            File::open(file_path.clone()).unwrap(),
249            widths,
250            0,
251            false,
252            false,
253        )
254        .unwrap();
255
256        let header = reader.header();
257        assert!(header.is_none());
258
259        let mut records = reader.records();
260        let record1 = records.next().unwrap().unwrap();
261        assert_eq!(record1.get(0), Some("123"));
262        assert_eq!(record1.get(1), Some("456"));
263        assert_eq!(record1.get(2), Some("789"));
264
265        let record2 = records.next().unwrap().unwrap();
266        assert_eq!(record2.get(0), Some("987"));
267        assert_eq!(record2.get(1), Some("654"));
268        assert_eq!(record2.get(2), Some("321"));
269
270        assert!(records.next().is_none());
271
272        delete_test_file(&file_path).unwrap();
273    }
274
275    #[test]
276    fn test_fwf_file_reader_with_separator() {
277        let content = "123-456-789\n987-654-321\n";
278        let file_path = create_test_file(content).unwrap();
279        let widths = vec![3, 3, 3];
280
281        let reader = Reader::new(
282            File::open(file_path.clone()).unwrap(),
283            widths,
284            1,
285            false,
286            false,
287        )
288        .unwrap();
289
290        let mut records = reader.records();
291        let record1 = records.next().unwrap().unwrap();
292        assert_eq!(record1.get(0), Some("123"));
293        assert_eq!(record1.get(1), Some("456"));
294        assert_eq!(record1.get(2), Some("789"));
295
296        let record2 = records.next().unwrap().unwrap();
297        assert_eq!(record2.get(0), Some("987"));
298        assert_eq!(record2.get(1), Some("654"));
299        assert_eq!(record2.get(2), Some("321"));
300
301        assert!(records.next().is_none());
302
303        delete_test_file(&file_path).unwrap();
304    }
305
306    #[test]
307    fn test_fwf_file_reader_with_flexible_width() {
308        let content = "123456\n987654321\n";
309        let file_path = create_test_file(content).unwrap();
310        let widths = vec![3, 3, 3];
311
312        let reader = Reader::new(
313            File::open(file_path.clone()).unwrap(),
314            widths,
315            0,
316            true,
317            false,
318        )
319        .unwrap();
320
321        let mut records = reader.records();
322        let record1 = records.next().unwrap().unwrap();
323        assert_eq!(record1.get(0), Some("123"));
324        assert_eq!(record1.get(1), Some("456"));
325        assert_eq!(record1.get(2), Some("")); // Last field is missing
326
327        let record2 = records.next().unwrap().unwrap();
328        assert_eq!(record2.get(0), Some("987"));
329        assert_eq!(record2.get(1), Some("654"));
330        assert_eq!(record2.get(2), Some("321"));
331
332        assert!(records.next().is_none());
333
334        delete_test_file(&file_path).unwrap();
335    }
336
337    #[test]
338    fn test_fwf_file_reader_empty_file() {
339        let content = "";
340        let file_path = create_test_file(content).unwrap();
341        let widths = vec![3, 3, 3];
342
343        let reader = Reader::new(
344            File::open(file_path.clone()).unwrap(),
345            widths,
346            0,
347            false,
348            false,
349        )
350        .unwrap();
351
352        let header = reader.header();
353        assert!(header.is_none());
354
355        let records = reader.records();
356        assert!(records.into_iter().next().is_none());
357
358        delete_test_file(&file_path).unwrap();
359    }
360
361    #[test]
362    fn test_fwf_record_iter_basic() {
363        let data = "123456789\n987654321\n".as_bytes();
364        let lines = BufReader::new(Cursor::new(data)).lines();
365        let widths = vec![3, 3, 3];
366        let separator_length = 0;
367        let flexible_width = false;
368
369        let mut iter = RecordIter {
370            reader: Reader {
371                lines,
372                widths,
373                separator_length,
374                flexible_width,
375                header: None,
376            },
377        };
378
379        let record1 = iter.next().unwrap().unwrap();
380        assert_eq!(record1.line, "123456789");
381        assert_eq!(record1.ranges, vec![0..3, 3..6, 6..9]);
382
383        let record2 = iter.next().unwrap().unwrap();
384        assert_eq!(record2.line, "987654321");
385        assert_eq!(record2.ranges, vec![0..3, 3..6, 6..9]);
386
387        assert!(iter.next().is_none());
388    }
389
390    #[test]
391    fn test_fwf_record_iter_with_flexible_width() {
392        let data = "123456\n987654321\n".as_bytes();
393        let lines = BufReader::new(Cursor::new(data)).lines();
394        let widths = vec![3, 3, 3];
395        let separator_length = 0;
396        let flexible_width = true;
397
398        let mut iter = RecordIter {
399            reader: Reader {
400                lines,
401                widths,
402                separator_length,
403                flexible_width,
404                header: None,
405            },
406        };
407
408        let record1 = iter.next().unwrap().unwrap();
409        assert_eq!(record1.line, "123456");
410        assert_eq!(record1.ranges, vec![0..3, 3..6, 6..6]); // Last range should be empty
411
412        let record2 = iter.next().unwrap().unwrap();
413        assert_eq!(record2.line, "987654321");
414        assert_eq!(record2.ranges, vec![0..3, 3..6, 6..9]);
415
416        assert!(iter.next().is_none());
417    }
418
419    #[test]
420    fn test_fwf_record_iter_with_separator() {
421        let data = "123-456-789\n987-654-321\n".as_bytes();
422        let lines = BufReader::new(Cursor::new(data)).lines();
423        let widths = vec![3, 3, 3];
424        let separator_length = 1;
425        let flexible_width = false;
426
427        let mut iter = RecordIter {
428            reader: Reader {
429                lines,
430                widths,
431                separator_length,
432                flexible_width,
433                header: None,
434            },
435        };
436
437        let record1 = iter.next().unwrap().unwrap();
438        assert_eq!(record1.line, "123-456-789");
439        assert_eq!(record1.ranges, vec![0..3, 4..7, 8..11]);
440
441        let record2 = iter.next().unwrap().unwrap();
442        assert_eq!(record2.line, "987-654-321");
443        assert_eq!(record2.ranges, vec![0..3, 4..7, 8..11]);
444
445        assert!(iter.next().is_none());
446    }
447
448    #[test]
449    fn test_fwf_record_iter_empty_line() {
450        let data = "123456789\n\n987654321\n".as_bytes();
451        let lines = BufReader::new(Cursor::new(data)).lines();
452        let widths = vec![3, 3, 3];
453        let separator_length = 0;
454        let flexible_width = false;
455
456        let mut iter = RecordIter {
457            reader: Reader {
458                lines,
459                widths,
460                separator_length,
461                flexible_width,
462                header: None,
463            },
464        };
465
466        let record1 = iter.next().unwrap().unwrap();
467        assert_eq!(record1.line, "123456789");
468        assert_eq!(record1.ranges, vec![0..3, 3..6, 6..9]);
469
470        let record2 = iter.next().unwrap();
471        assert!(record2.is_err());
472        assert!(matches!(record2.unwrap_err(), ReaderError::EmptyLine));
473
474        let record3 = iter.next().unwrap().unwrap();
475        assert_eq!(record3.line, "987654321");
476        assert_eq!(record3.ranges, vec![0..3, 3..6, 6..9]);
477
478        assert!(iter.next().is_none());
479    }
480
481    #[test]
482    fn test_fwf_record_iter_width_mismatch() {
483        let data = "12345\n987654321\n".as_bytes();
484        let lines = BufReader::new(Cursor::new(data)).lines();
485        let widths = vec![3, 3, 3];
486        let separator_length = 0;
487        let flexible_width = false;
488
489        let mut iter = RecordIter {
490            reader: Reader {
491                lines,
492                widths,
493                separator_length,
494                flexible_width,
495                header: None,
496            },
497        };
498
499        let record1 = iter.next().unwrap();
500        assert!(record1.is_err());
501        assert!(matches!(
502            record1.unwrap_err(),
503            ReaderError::WidthMismatch(3, 3)
504        ));
505
506        let record2 = iter.next().unwrap().unwrap();
507        assert_eq!(record2.line, "987654321");
508        assert_eq!(record2.ranges, vec![0..3, 3..6, 6..9]);
509
510        assert!(iter.next().is_none());
511    }
512
513    #[test]
514    fn test_fwf_record_iter_end_of_file() {
515        let data = "123456789".as_bytes(); // No newline at the end
516        let lines = BufReader::new(Cursor::new(data)).lines();
517        let widths = vec![3, 3, 3];
518        let separator_length = 0;
519        let flexible_width = false;
520
521        let mut iter = RecordIter {
522            reader: Reader {
523                lines,
524                widths,
525                separator_length,
526                flexible_width,
527                header: None,
528            },
529        };
530
531        let record1 = iter.next().unwrap().unwrap();
532        assert_eq!(record1.line, "123456789");
533        assert_eq!(record1.ranges, vec![0..3, 3..6, 6..9]);
534
535        assert!(iter.next().is_none());
536    }
537
538    #[test]
539    fn test_create_fwf_record() {
540        let line = "123456789".to_string();
541        let widths = vec![3, 3, 3];
542        let sep_len = 0;
543        let flexible_widths = false;
544
545        let record = Record::try_new(line.clone(), &widths, sep_len, flexible_widths);
546
547        assert!(record.is_ok());
548        let record = record.unwrap();
549        assert_eq!(record.line, line);
550        assert_eq!(record.ranges, vec![0..3, 3..6, 6..9]);
551    }
552
553    #[test]
554    fn test_create_fwf_record_with_flexible_width() {
555        let line = "1234567".to_string();
556        let widths = vec![3, 3, 3, 3];
557        let sep_len = 0;
558        let flexible_widths = true;
559
560        let record = Record::try_new(line.clone(), &widths, sep_len, flexible_widths);
561
562        assert!(record.is_ok());
563        let record = record.unwrap();
564        assert_eq!(record.line, line);
565        assert_eq!(record.ranges, vec![0..3, 3..6, 6..7, 7..7]);
566    }
567
568    #[test]
569    fn test_create_fwf_record_with_mismatch_width() {
570        let line = "12345".to_string();
571        let widths = vec![3, 3, 3];
572        let sep_len = 0;
573        let flexible_widths = false;
574
575        let record = Record::try_new(line, &widths, sep_len, flexible_widths);
576
577        assert!(record.is_err());
578        let err = record.unwrap_err();
579        assert!(matches!(err, ReaderError::WidthMismatch(3, 3)));
580    }
581
582    #[test]
583    fn test_create_fwf_record_with_separator() {
584        let line = "123-456-789".to_string();
585        let widths = vec![3, 3, 3];
586        let sep_len = 1;
587        let flexible_widths = false;
588
589        let record = Record::try_new(line.clone(), &widths, sep_len, flexible_widths);
590
591        assert!(record.is_ok());
592        let record = record.unwrap();
593        assert_eq!(record.line, line);
594        assert_eq!(record.ranges, vec![0..3, 4..7, 8..11]);
595    }
596
597    #[test]
598    fn test_get_field_by_index() {
599        let line = "123456789".to_string();
600        let widths = vec![3, 3, 3];
601        let sep_len = 0;
602        let flexible_widths = false;
603
604        let record = Record::try_new(line.clone(), &widths, sep_len, flexible_widths).unwrap();
605
606        assert_eq!(record.get(0), Some("123"));
607        assert_eq!(record.get(1), Some("456"));
608        assert_eq!(record.get(2), Some("789"));
609        assert_eq!(record.get(3), None);
610    }
611
612    #[test]
613    fn test_iterate_over_fields() {
614        let line = "123456789".to_string();
615        let widths = vec![3, 3, 3];
616        let sep_len = 0;
617        let flexible_widths = false;
618
619        let record = Record::try_new(line.clone(), &widths, sep_len, flexible_widths).unwrap();
620        let fields: Vec<&str> = record.iter().collect();
621
622        assert_eq!(fields, vec!["123", "456", "789"]);
623    }
624
625    #[test]
626    fn test_empty_line() {
627        let line = "".to_string();
628        let widths = vec![3, 3, 3];
629        let sep_len = 0;
630        let flexible_widths = false;
631
632        let record = Record::try_new(line, &widths, sep_len, flexible_widths);
633
634        assert!(record.is_err());
635        let err = record.unwrap_err();
636        assert!(matches!(err, ReaderError::EmptyLine));
637    }
638}