simd_csv/
line_reader.rs

1use memchr::memchr;
2
3use std::io::{self, Read};
4
5use crate::buffer::ScratchBuffer;
6use crate::utils::trim_trailing_crlf;
7
8pub struct LineReader<R> {
9    inner: ScratchBuffer<R>,
10}
11
12impl<R: Read> LineReader<R> {
13    pub fn new(inner: R) -> Self {
14        Self {
15            inner: ScratchBuffer::new(inner),
16        }
17    }
18
19    pub fn with_capacity(capacity: usize, inner: R) -> Self {
20        Self {
21            inner: ScratchBuffer::with_capacity(capacity, inner),
22        }
23    }
24
25    pub fn count_lines(&mut self) -> io::Result<u64> {
26        let mut count: u64 = 0;
27        let mut current_is_empty = true;
28
29        loop {
30            let input = self.inner.fill_buf()?;
31            let len = input.len();
32
33            if len == 0 {
34                if !current_is_empty {
35                    count += 1;
36                }
37
38                return Ok(count);
39            }
40
41            match memchr(b'\n', input) {
42                None => {
43                    self.inner.consume(len);
44                    current_is_empty = false;
45                }
46                Some(pos) => {
47                    count += 1;
48                    self.inner.consume(pos + 1);
49                    current_is_empty = true;
50                }
51            };
52        }
53    }
54
55    pub fn read_line(&mut self) -> io::Result<Option<&[u8]>> {
56        self.inner.reset();
57
58        loop {
59            let input = self.inner.fill_buf()?;
60            let len = input.len();
61
62            if len == 0 {
63                if self.inner.has_something_saved() {
64                    return Ok(Some(trim_trailing_crlf(self.inner.saved())));
65                }
66
67                return Ok(None);
68            }
69
70            match memchr(b'\n', input) {
71                None => {
72                    self.inner.save();
73                }
74                Some(pos) => {
75                    let bytes = self.inner.flush(pos + 1);
76                    return Ok(Some(trim_trailing_crlf(bytes)));
77                }
78            };
79        }
80    }
81}
82
83#[cfg(test)]
84mod tests {
85    use std::io::Cursor;
86
87    use super::*;
88
89    #[test]
90    fn test_read_line() -> io::Result<()> {
91        let tests: &[(&[u8], Vec<&[u8]>)] = &[
92            (b"", vec![]),
93            (b"test", vec![b"test"]),
94            (
95                b"hello\nwhatever\r\nbye!",
96                vec![b"hello", b"whatever", b"bye!"],
97            ),
98            (
99                b"hello\nwhatever\nbye!\n",
100                vec![b"hello", b"whatever", b"bye!"],
101            ),
102            (
103                b"hello\nwhatever\r\nbye!\n\n\r\n\n",
104                vec![b"hello", b"whatever", b"bye!", b"", b"", b""],
105            ),
106        ];
107
108        for (data, expected) in tests {
109            let mut reader = LineReader::new(Cursor::new(data));
110
111            let mut lines = Vec::new();
112
113            while let Some(line) = reader.read_line()? {
114                lines.push(line.to_vec());
115            }
116
117            assert_eq!(lines, *expected);
118
119            let mut reader = LineReader::new(Cursor::new(data));
120
121            assert_eq!(reader.count_lines()?, expected.len() as u64);
122        }
123
124        Ok(())
125    }
126}