csv_sniffer/
sample.rs

1use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
2
3use error::*;
4
5/// Argument used when calling `sample_size` on `Sniffer`.
6#[derive(Debug, Clone, Copy)]
7pub enum SampleSize {
8    /// Use a number of records as the size of the sample to sniff.
9    Records(usize),
10    /// Use a number of bytes as the size of the sample to sniff.
11    Bytes(usize),
12    /// Sniff the entire sample.
13    All,
14}
15
16pub fn take_sample_from_start<R>(reader: &mut R, sample_size: SampleSize) -> Result<SampleIter<R>>
17where
18    R: Read + Seek,
19{
20    reader.seek(SeekFrom::Start(0))?;
21    Ok(SampleIter::new(reader, sample_size))
22}
23
24pub struct SampleIter<'a, R: 'a + Read> {
25    reader: BufReader<&'a mut R>,
26    sample_size: SampleSize,
27    n_bytes: usize,
28    n_records: usize,
29    is_done: bool,
30}
31
32impl<'a, R: Read> SampleIter<'a, R> {
33    fn new(reader: &'a mut R, sample_size: SampleSize) -> SampleIter<'a, R> {
34        let buf_reader = BufReader::new(reader);
35        SampleIter {
36            reader: buf_reader,
37            sample_size,
38            n_bytes: 0,
39            n_records: 0,
40            is_done: false,
41        }
42    }
43}
44
45impl<'a, R: Read> Iterator for SampleIter<'a, R> {
46    type Item = Result<String>;
47
48    fn next(&mut self) -> Option<Result<String>> {
49        if self.is_done {
50            return None;
51        }
52
53        let mut output = String::new();
54        let n_bytes_read = match self.reader.read_line(&mut output) {
55            Ok(n_bytes_read) => n_bytes_read,
56            Err(e) => {
57                return Some(Err(e.into()));
58            }
59        };
60        if n_bytes_read == 0 {
61            self.is_done = true;
62            return None;
63        }
64        let last_byte = (output.as_ref() as &[u8])[output.len() - 1];
65        if last_byte != b'\n' && last_byte != b'\r' {
66            // non CR/LF-ended line
67            // line was cut off before ending, so we ignore it!
68            self.is_done = true;
69            return None;
70        } else {
71            output = output.trim_matches(|c| c == '\n' || c == '\r').into();
72        }
73        self.n_bytes += n_bytes_read;
74        self.n_records += 1;
75        match self.sample_size {
76            SampleSize::Records(max_records) => {
77                if self.n_records > max_records {
78                    self.is_done = true;
79                    return None;
80                }
81            }
82            SampleSize::Bytes(max_bytes) => {
83                if self.n_bytes > max_bytes {
84                    self.is_done = true;
85                    return None;
86                }
87            }
88            SampleSize::All => {}
89        }
90        Some(Ok(output))
91    }
92}