simd_csv/
line_reader.rs

1use memchr::memchr;
2
3use std::io::{self, BufReader, Read};
4
5use crate::buffer::ScratchBuffer;
6use crate::utils::trim_trailing_crlf;
7
8pub struct LineReader<R> {
9    inner: ScratchBuffer<R>,
10}
11
12impl<R: Read> LineReader<R> {
13    pub fn new(inner: R) -> Self {
14        Self {
15            inner: ScratchBuffer::new(inner),
16        }
17    }
18
19    pub fn with_capacity(capacity: usize, inner: R) -> Self {
20        Self {
21            inner: ScratchBuffer::with_capacity(capacity, inner),
22        }
23    }
24
25    pub fn count_lines(&mut self) -> io::Result<u64> {
26        let mut count: u64 = 0;
27        let mut current_is_empty = true;
28
29        loop {
30            let input = self.inner.fill_buf()?;
31            let len = input.len();
32
33            if len == 0 {
34                if !current_is_empty {
35                    count += 1;
36                }
37
38                return Ok(count);
39            }
40
41            match memchr(b'\n', input) {
42                None => {
43                    self.inner.consume(len);
44                    current_is_empty = false;
45                }
46                Some(pos) => {
47                    count += 1;
48                    self.inner.consume(pos + 1);
49                    current_is_empty = true;
50                }
51            };
52        }
53    }
54
55    pub fn read_line(&mut self) -> io::Result<Option<&[u8]>> {
56        self.inner.reset();
57
58        loop {
59            let input = self.inner.fill_buf()?;
60            let len = input.len();
61
62            if len == 0 {
63                if self.inner.has_something_saved() {
64                    return Ok(Some(trim_trailing_crlf(self.inner.saved())));
65                }
66
67                return Ok(None);
68            }
69
70            match memchr(b'\n', input) {
71                None => {
72                    self.inner.save();
73                }
74                Some(pos) => {
75                    let bytes = self.inner.flush(pos + 1);
76                    return Ok(Some(trim_trailing_crlf(bytes)));
77                }
78            };
79        }
80    }
81
82    #[inline(always)]
83    pub fn position(&self) -> u64 {
84        self.inner.position()
85    }
86
87    #[inline(always)]
88    pub fn into_bufreader(self) -> BufReader<R> {
89        self.inner.into_bufreader()
90    }
91
92    #[inline(always)]
93    pub fn into_inner(self) -> R {
94        self.inner.into_bufreader().into_inner()
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use std::io::Cursor;
101
102    use super::*;
103
104    #[test]
105    fn test_read_line() -> io::Result<()> {
106        let tests: &[(&[u8], Vec<&[u8]>)] = &[
107            (b"", vec![]),
108            (b"test", vec![b"test"]),
109            (
110                b"hello\nwhatever\r\nbye!",
111                vec![b"hello", b"whatever", b"bye!"],
112            ),
113            (
114                b"hello\nwhatever\nbye!\n",
115                vec![b"hello", b"whatever", b"bye!"],
116            ),
117            (
118                b"hello\nwhatever\r\nbye!\n\n\r\n\n",
119                vec![b"hello", b"whatever", b"bye!", b"", b"", b""],
120            ),
121        ];
122
123        for (data, expected) in tests {
124            let mut reader = LineReader::new(Cursor::new(data));
125
126            let mut lines = Vec::new();
127
128            while let Some(line) = reader.read_line()? {
129                lines.push(line.to_vec());
130            }
131
132            assert_eq!(lines, *expected);
133
134            let mut reader = LineReader::new(Cursor::new(data));
135
136            assert_eq!(reader.count_lines()?, expected.len() as u64);
137        }
138
139        Ok(())
140    }
141}