1use std::io::Read;
2
3use crate::buffer::ScratchBuffer;
4use crate::core::{CoreReader, ReadResult};
5use crate::error;
6use crate::utils::trim_trailing_crlf;
7
8pub struct SplitterBuilder {
9 delimiter: u8,
10 quote: u8,
11 buffer_capacity: Option<usize>,
12}
13
14impl Default for SplitterBuilder {
15 fn default() -> Self {
16 Self {
17 delimiter: b',',
18 quote: b'"',
19 buffer_capacity: None,
20 }
21 }
22}
23
24impl SplitterBuilder {
25 pub fn new() -> Self {
26 Self::default()
27 }
28
29 pub fn with_capacity(capacity: usize) -> Self {
30 let mut splitter = Self::default();
31 splitter.buffer_capacity(capacity);
32 splitter
33 }
34
35 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
36 self.delimiter = delimiter;
37 self
38 }
39
40 pub fn quote(&mut self, quote: u8) -> &mut Self {
41 self.quote = quote;
42 self
43 }
44
45 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
46 self.buffer_capacity = Some(capacity);
47 self
48 }
49
50 pub fn from_reader<R: Read>(&self, reader: R) -> Splitter<R> {
51 Splitter {
52 buffer: ScratchBuffer::with_optional_capacity(self.buffer_capacity, reader),
53 inner: CoreReader::new(self.delimiter, self.quote),
54 }
55 }
56}
57
58pub struct Splitter<R> {
59 buffer: ScratchBuffer<R>,
60 inner: CoreReader,
61}
62
63impl<R: Read> Splitter<R> {
64 pub fn from_reader(reader: R) -> Self {
65 SplitterBuilder::new().from_reader(reader)
66 }
67
68 #[inline(always)]
69 pub fn strip_bom(&mut self) -> error::Result<()> {
70 self.buffer.strip_bom()?;
71 Ok(())
72 }
73
74 pub fn count_records(&mut self) -> error::Result<u64> {
75 use ReadResult::*;
76
77 let mut count: u64 = 0;
78
79 loop {
80 let input = self.buffer.fill_buf()?;
81
82 let (result, pos) = self.inner.split_record(input);
83
84 self.buffer.consume(pos);
85
86 match result {
87 End => break,
88 InputEmpty | Cr | Lf => continue,
89 Record => {
90 count += 1;
91 }
92 };
93 }
94
95 Ok(count)
96 }
97
98 pub fn split_record(&mut self) -> error::Result<Option<&[u8]>> {
99 use ReadResult::*;
100
101 self.buffer.reset();
102
103 loop {
104 let input = self.buffer.fill_buf()?;
105
106 let (result, pos) = self.inner.split_record(input);
107
108 match result {
109 End => {
110 self.buffer.consume(pos);
111 return Ok(None);
112 }
113 Cr | Lf => {
114 self.buffer.consume(pos);
115 }
116 InputEmpty => {
117 self.buffer.save();
118 }
119 Record => {
120 return Ok(Some(trim_trailing_crlf(self.buffer.flush(pos))));
121 }
122 };
123 }
124 }
125}
126
127#[cfg(test)]
128mod tests {
129 use std::io::Cursor;
130
131 use super::*;
132
133 fn count_records(data: &str, capacity: usize) -> u64 {
134 let mut splitter = SplitterBuilder::with_capacity(capacity).from_reader(Cursor::new(data));
135 splitter.count_records().unwrap()
136 }
137
138 fn split_records(data: &str, capacity: usize) -> u64 {
139 let mut splitter = SplitterBuilder::with_capacity(capacity).from_reader(Cursor::new(data));
140 let mut count: u64 = 0;
141
142 while let Some(_) = splitter.split_record().unwrap() {
143 count += 1;
144 }
145
146 count
147 }
148
149 #[test]
150 fn test_count() {
151 assert_eq!(count_records("", 1024), 0);
153
154 let tests = vec![
156 "name\njohn\nlucy",
157 "name\njohn\nlucy\n",
158 "name\n\njohn\r\nlucy\n",
159 "name\n\njohn\r\nlucy\n\n",
160 "name\n\n\njohn\r\n\r\nlucy\n\n\n",
161 "\nname\njohn\nlucy",
162 "\n\nname\njohn\nlucy",
163 "\r\n\r\nname\njohn\nlucy",
164 "name\njohn\nlucy\r\n",
165 "name\njohn\nlucy\r\n\r\n",
166 ];
167
168 for capacity in [32usize, 4, 3, 2, 1] {
169 for test in tests.iter() {
170 assert_eq!(
171 count_records(test, capacity),
172 3,
173 "capacity={} string={:?}",
174 capacity,
175 test
176 );
177 }
178 }
179
180 let data = "name,surname,age\njohn,landy,45\nlucy,rose,67";
182 assert_eq!(count_records(data, 1024), 3);
183 assert_eq!(split_records(data, 1024), 3);
184
185 for capacity in [1024usize, 32usize, 4, 3, 2, 1] {
187 let data = "name,surname,age\n\"john\",\"landy, the \"\"everlasting\"\" bastard\",45\nlucy,rose,\"67\"\njermaine,jackson,\"89\"\n\nkarine,loucan,\"52\"\r\n";
188
189 assert_eq!(count_records(data, capacity), 5, "capacity={}", capacity);
190 assert_eq!(split_records(data, capacity), 5, "capacity={}", capacity);
191 }
192
193 let data = "name\tsurname\tage\njohn\tlandy\t45\nlucy\trose\t67";
195 assert_eq!(count_records(data, 1024), 3);
196 assert_eq!(split_records(data, 1024), 3);
197 }
198
199 #[test]
200 fn test_empty_row() -> error::Result<()> {
201 let data = "name\n\"\"\nlucy\n\"\"";
202
203 let mut reader = Splitter::from_reader(Cursor::new(data));
205
206 assert_eq!(reader.count_records()?, 4);
207
208 Ok(())
209 }
210}