1use std::io::{BufRead, BufReader, Read, Seek, SeekFrom};
2
3use error::*;
4
5#[derive(Debug, Clone, Copy)]
7pub enum SampleSize {
8 Records(usize),
10 Bytes(usize),
12 All,
14}
15
16pub fn take_sample_from_start<R>(reader: &mut R, sample_size: SampleSize) -> Result<SampleIter<R>>
17where
18 R: Read + Seek,
19{
20 reader.seek(SeekFrom::Start(0))?;
21 Ok(SampleIter::new(reader, sample_size))
22}
23
24pub struct SampleIter<'a, R: 'a + Read> {
25 reader: BufReader<&'a mut R>,
26 sample_size: SampleSize,
27 n_bytes: usize,
28 n_records: usize,
29 is_done: bool,
30}
31
32impl<'a, R: Read> SampleIter<'a, R> {
33 fn new(reader: &'a mut R, sample_size: SampleSize) -> SampleIter<'a, R> {
34 let buf_reader = BufReader::new(reader);
35 SampleIter {
36 reader: buf_reader,
37 sample_size,
38 n_bytes: 0,
39 n_records: 0,
40 is_done: false,
41 }
42 }
43}
44
45impl<'a, R: Read> Iterator for SampleIter<'a, R> {
46 type Item = Result<String>;
47
48 fn next(&mut self) -> Option<Result<String>> {
49 if self.is_done {
50 return None;
51 }
52
53 let mut output = String::new();
54 let n_bytes_read = match self.reader.read_line(&mut output) {
55 Ok(n_bytes_read) => n_bytes_read,
56 Err(e) => {
57 return Some(Err(e.into()));
58 }
59 };
60 if n_bytes_read == 0 {
61 self.is_done = true;
62 return None;
63 }
64 let last_byte = (output.as_ref() as &[u8])[output.len() - 1];
65 if last_byte != b'\n' && last_byte != b'\r' {
66 self.is_done = true;
69 return None;
70 } else {
71 output = output.trim_matches(|c| c == '\n' || c == '\r').into();
72 }
73 self.n_bytes += n_bytes_read;
74 self.n_records += 1;
75 match self.sample_size {
76 SampleSize::Records(max_records) => {
77 if self.n_records > max_records {
78 self.is_done = true;
79 return None;
80 }
81 }
82 SampleSize::Bytes(max_bytes) => {
83 if self.n_bytes > max_bytes {
84 self.is_done = true;
85 return None;
86 }
87 }
88 SampleSize::All => {}
89 }
90 Some(Ok(output))
91 }
92}