1use crate::core::{CoreReader, ReadResult};
2use crate::records::{ByteRecord, ByteRecordBuilder};
3use crate::utils::trim_bom;
4
5pub struct TotalReaderBuilder {
7 delimiter: u8,
8 quote: u8,
9 has_headers: bool,
10}
11
12impl Default for TotalReaderBuilder {
13 fn default() -> Self {
14 Self {
15 delimiter: b',',
16 quote: b'"',
17 has_headers: true,
18 }
19 }
20}
21
22impl TotalReaderBuilder {
23 pub fn new() -> Self {
24 Self::default()
25 }
26
27 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
28 self.delimiter = delimiter;
29 self
30 }
31
32 pub fn quote(&mut self, quote: u8) -> &mut Self {
33 self.quote = quote;
34 self
35 }
36
37 pub fn has_headers(&mut self, yes: bool) -> &mut Self {
38 self.has_headers = yes;
39 self
40 }
41
42 pub fn from_bytes<'b>(&self, bytes: &'b [u8]) -> TotalReader<'b> {
43 TotalReader {
44 inner: CoreReader::new(self.delimiter, self.quote),
45 bytes,
46 pos: 0,
47 headers: ByteRecord::new(),
48 has_read: false,
49 has_headers: self.has_headers,
50 }
51 }
52}
53
54pub struct TotalReader<'b> {
63 inner: CoreReader,
64 bytes: &'b [u8],
65 pos: usize,
66 headers: ByteRecord,
67 has_read: bool,
68 has_headers: bool,
69}
70
71impl<'b> TotalReader<'b> {
72 pub fn from_bytes(bytes: &'b [u8]) -> Self {
73 TotalReaderBuilder::new().from_bytes(bytes)
74 }
75
76 #[inline]
77 fn on_first_read(&mut self) {
78 if self.has_read {
79 return;
80 }
81
82 let bom_len = trim_bom(self.bytes);
84 self.pos += bom_len;
85
86 let mut headers = ByteRecord::new();
88
89 let has_data = self.read_byte_record_impl(&mut headers);
90
91 if has_data && !self.has_headers {
92 self.pos = bom_len;
93 }
94
95 self.headers = headers;
96 self.has_read = true;
97 }
98
99 #[inline]
102 pub fn byte_headers(&mut self) -> &ByteRecord {
103 self.on_first_read();
104
105 &self.headers
106 }
107
108 pub fn count_records(&mut self) -> u64 {
109 use ReadResult::*;
110
111 self.on_first_read();
112
113 let mut count: u64 = 0;
114
115 loop {
116 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
117
118 self.pos += pos;
119
120 match result {
121 End => break,
122 InputEmpty | Cr | Lf => continue,
123 Record => {
124 count += 1;
125 }
126 };
127 }
128
129 count.saturating_sub(if self.has_headers { 1 } else { 0 })
130 }
131
132 pub fn split_record(&mut self) -> Option<&[u8]> {
133 use ReadResult::*;
134
135 self.on_first_read();
136
137 let starting_pos = self.pos;
138
139 loop {
140 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
141
142 self.pos += pos;
143
144 match result {
145 End => return None,
146 InputEmpty | Cr | Lf => continue,
147 Record => return Some(&self.bytes[starting_pos..self.pos]),
148 }
149 }
150 }
151
152 fn read_byte_record_impl(&mut self, record: &mut ByteRecord) -> bool {
153 use ReadResult::*;
154
155 record.clear();
156
157 let mut record_builder = ByteRecordBuilder::wrap(record);
158
159 loop {
160 let (result, pos) = self
161 .inner
162 .read_record(&self.bytes[self.pos..], &mut record_builder);
163
164 self.pos += pos;
165
166 match result {
167 End => {
168 return false;
169 }
170 Cr | Lf | InputEmpty => {
171 continue;
172 }
173 Record => {
174 return true;
175 }
176 };
177 }
178 }
179
180 #[inline(always)]
181 pub fn read_byte_record(&mut self, record: &mut ByteRecord) -> bool {
182 self.on_first_read();
183 self.read_byte_record_impl(record)
184 }
185
186 #[inline(always)]
187 pub fn byte_records<'r>(&'r mut self) -> ByteRecordsIter<'r, 'b> {
188 ByteRecordsIter {
189 reader: self,
190 record: ByteRecord::new(),
191 }
192 }
193
194 #[inline(always)]
195 pub fn position(&self) -> u64 {
196 self.pos as u64
197 }
198}
199
200pub struct ByteRecordsIter<'r, 'b> {
201 reader: &'r mut TotalReader<'b>,
202 record: ByteRecord,
203}
204
205impl Iterator for ByteRecordsIter<'_, '_> {
206 type Item = ByteRecord;
207
208 #[inline]
209 fn next(&mut self) -> Option<Self::Item> {
210 if self.reader.read_byte_record(&mut self.record) {
213 Some(self.record.clone())
214 } else {
215 None
216 }
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223
224 impl<'b> TotalReader<'b> {
225 fn from_bytes_no_headers(bytes: &'b [u8]) -> Self {
226 TotalReaderBuilder::new()
227 .has_headers(false)
228 .from_bytes(bytes)
229 }
230 }
231
232 fn count_records(data: &str) -> u64 {
233 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
234 reader.count_records()
235 }
236
237 fn split_records(data: &str) -> u64 {
238 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
239
240 let mut count: u64 = 0;
241
242 while reader.split_record().is_some() {
243 count += 1;
244 }
245
246 count
247 }
248
249 #[test]
250 fn test_count() {
251 assert_eq!(count_records(""), 0);
253
254 let tests = vec![
256 "name\njohn\nlucy",
257 "name\njohn\nlucy\n",
258 "name\n\njohn\r\nlucy\n",
259 "name\n\njohn\r\nlucy\n\n",
260 "name\n\n\njohn\r\n\r\nlucy\n\n\n",
261 "\nname\njohn\nlucy",
262 "\n\nname\njohn\nlucy",
263 "\r\n\r\nname\njohn\nlucy",
264 "name\njohn\nlucy\r\n",
265 "name\njohn\nlucy\r\n\r\n",
266 ];
267
268 for test in tests.iter() {
269 assert_eq!(count_records(test), 3, "string={:?}", test);
270 assert_eq!(split_records(test), 3, "string={:?}", test);
271 }
272 }
273
274 #[test]
275 fn test_byte_headers() {
276 let data = b"name,surname\njohn,dandy";
277
278 let mut reader = TotalReader::from_bytes(data);
280 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
281 assert_eq!(
282 reader.byte_records().next().unwrap(),
283 brec!["john", "dandy"]
284 );
285
286 let mut reader = TotalReader::from_bytes(data);
288 assert_eq!(
289 reader.byte_records().next().unwrap(),
290 brec!["john", "dandy"]
291 );
292 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
293
294 let mut reader = TotalReader::from_bytes_no_headers(data);
296 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
297 assert_eq!(
298 reader.byte_records().next().unwrap(),
299 brec!["name", "surname"]
300 );
301
302 let mut reader = TotalReader::from_bytes_no_headers(data);
304 assert_eq!(
305 reader.byte_records().next().unwrap(),
306 brec!["name", "surname"]
307 );
308 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
309
310 let mut reader = TotalReader::from_bytes(b"");
312 assert_eq!(reader.byte_headers(), &brec![]);
313 assert!(reader.byte_records().next().is_none());
314
315 let mut reader = TotalReader::from_bytes_no_headers(b"");
317 assert_eq!(reader.byte_headers(), &brec![]);
318 assert!(reader.byte_records().next().is_none());
319 }
320}