1use crate::core::{CoreReader, ReadResult};
2use crate::records::{ByteRecord, ByteRecordBuilder};
3use crate::utils::trim_bom;
4
5pub struct TotalReaderBuilder {
7 delimiter: u8,
8 quote: u8,
9 has_headers: bool,
10}
11
12impl Default for TotalReaderBuilder {
13 fn default() -> Self {
14 Self {
15 delimiter: b',',
16 quote: b'"',
17 has_headers: true,
18 }
19 }
20}
21
22impl TotalReaderBuilder {
23 pub fn new() -> Self {
25 Self::default()
26 }
27
28 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
34 self.delimiter = delimiter;
35 self
36 }
37
38 pub fn quote(&mut self, quote: u8) -> &mut Self {
44 self.quote = quote;
45 self
46 }
47
48 pub fn has_headers(&mut self, yes: bool) -> &mut Self {
52 self.has_headers = yes;
53 self
54 }
55
56 pub fn from_bytes<'b>(&self, bytes: &'b [u8]) -> TotalReader<'b> {
58 TotalReader {
59 inner: CoreReader::new(self.delimiter, self.quote),
60 bytes,
61 pos: 0,
62 headers: ByteRecord::new(),
63 has_read: false,
64 has_headers: self.has_headers,
65 }
66 }
67}
68
69pub struct TotalReader<'b> {
78 inner: CoreReader,
79 bytes: &'b [u8],
80 pos: usize,
81 headers: ByteRecord,
82 has_read: bool,
83 has_headers: bool,
84}
85
86impl<'b> TotalReader<'b> {
87 pub fn from_bytes(bytes: &'b [u8]) -> Self {
90 TotalReaderBuilder::new().from_bytes(bytes)
91 }
92
93 #[inline]
94 fn on_first_read(&mut self) {
95 if self.has_read {
96 return;
97 }
98
99 let bom_len = trim_bom(self.bytes);
101 self.pos += bom_len;
102
103 let mut headers = ByteRecord::new();
105
106 let has_data = self.read_byte_record_impl(&mut headers);
107
108 if has_data && !self.has_headers {
109 self.pos = bom_len;
110 }
111
112 self.headers = headers;
113 self.has_read = true;
114 }
115
116 #[inline]
119 pub fn byte_headers(&mut self) -> &ByteRecord {
120 self.on_first_read();
121
122 &self.headers
123 }
124
125 pub fn count_records(&mut self) -> u64 {
127 use ReadResult::*;
128
129 self.on_first_read();
130
131 let mut count: u64 = 0;
132
133 loop {
134 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
135
136 self.pos += pos;
137
138 match result {
139 End => break,
140 InputEmpty | Cr | Lf => continue,
141 Record => {
142 count += 1;
143 }
144 };
145 }
146
147 count.saturating_sub(if self.has_headers { 1 } else { 0 })
148 }
149
150 pub fn split_record(&mut self) -> Option<&[u8]> {
155 use ReadResult::*;
156
157 self.on_first_read();
158
159 let starting_pos = self.pos;
160
161 loop {
162 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
163
164 self.pos += pos;
165
166 match result {
167 End => return None,
168 InputEmpty | Cr | Lf => continue,
169 Record => return Some(&self.bytes[starting_pos..self.pos]),
170 }
171 }
172 }
173
174 fn read_byte_record_impl(&mut self, record: &mut ByteRecord) -> bool {
175 use ReadResult::*;
176
177 record.clear();
178
179 let mut record_builder = ByteRecordBuilder::wrap(record);
180
181 loop {
182 let (result, pos) = self
183 .inner
184 .read_record(&self.bytes[self.pos..], &mut record_builder);
185
186 self.pos += pos;
187
188 match result {
189 End => {
190 return false;
191 }
192 Cr | Lf | InputEmpty => {
193 continue;
194 }
195 Record => {
196 return true;
197 }
198 };
199 }
200 }
201
202 #[inline(always)]
207 pub fn read_byte_record(&mut self, record: &mut ByteRecord) -> bool {
208 self.on_first_read();
209 self.read_byte_record_impl(record)
210 }
211
212 #[inline(always)]
214 pub fn byte_records<'r>(&'r mut self) -> ByteRecordsIter<'r, 'b> {
215 ByteRecordsIter {
216 reader: self,
217 record: ByteRecord::new(),
218 }
219 }
220
221 #[inline(always)]
223 pub fn position(&self) -> u64 {
224 self.pos as u64
225 }
226}
227
228pub struct ByteRecordsIter<'r, 'b> {
229 reader: &'r mut TotalReader<'b>,
230 record: ByteRecord,
231}
232
233impl Iterator for ByteRecordsIter<'_, '_> {
234 type Item = ByteRecord;
235
236 #[inline]
237 fn next(&mut self) -> Option<Self::Item> {
238 if self.reader.read_byte_record(&mut self.record) {
241 Some(self.record.clone())
242 } else {
243 None
244 }
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 impl<'b> TotalReader<'b> {
253 fn from_bytes_no_headers(bytes: &'b [u8]) -> Self {
254 TotalReaderBuilder::new()
255 .has_headers(false)
256 .from_bytes(bytes)
257 }
258 }
259
260 fn count_records(data: &str) -> u64 {
261 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
262 reader.count_records()
263 }
264
265 fn split_records(data: &str) -> u64 {
266 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
267
268 let mut count: u64 = 0;
269
270 while reader.split_record().is_some() {
271 count += 1;
272 }
273
274 count
275 }
276
277 #[test]
278 fn test_count() {
279 assert_eq!(count_records(""), 0);
281
282 let tests = vec![
284 "name\njohn\nlucy",
285 "name\njohn\nlucy\n",
286 "name\n\njohn\r\nlucy\n",
287 "name\n\njohn\r\nlucy\n\n",
288 "name\n\n\njohn\r\n\r\nlucy\n\n\n",
289 "\nname\njohn\nlucy",
290 "\n\nname\njohn\nlucy",
291 "\r\n\r\nname\njohn\nlucy",
292 "name\njohn\nlucy\r\n",
293 "name\njohn\nlucy\r\n\r\n",
294 ];
295
296 for test in tests.iter() {
297 assert_eq!(count_records(test), 3, "string={:?}", test);
298 assert_eq!(split_records(test), 3, "string={:?}", test);
299 }
300 }
301
302 #[test]
303 fn test_byte_headers() {
304 let data = b"name,surname\njohn,dandy";
305
306 let mut reader = TotalReader::from_bytes(data);
308 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
309 assert_eq!(
310 reader.byte_records().next().unwrap(),
311 brec!["john", "dandy"]
312 );
313
314 let mut reader = TotalReader::from_bytes(data);
316 assert_eq!(
317 reader.byte_records().next().unwrap(),
318 brec!["john", "dandy"]
319 );
320 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
321
322 let mut reader = TotalReader::from_bytes_no_headers(data);
324 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
325 assert_eq!(
326 reader.byte_records().next().unwrap(),
327 brec!["name", "surname"]
328 );
329
330 let mut reader = TotalReader::from_bytes_no_headers(data);
332 assert_eq!(
333 reader.byte_records().next().unwrap(),
334 brec!["name", "surname"]
335 );
336 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
337
338 let mut reader = TotalReader::from_bytes(b"");
340 assert_eq!(reader.byte_headers(), &brec![]);
341 assert!(reader.byte_records().next().is_none());
342
343 let mut reader = TotalReader::from_bytes_no_headers(b"");
345 assert_eq!(reader.byte_headers(), &brec![]);
346 assert!(reader.byte_records().next().is_none());
347 }
348}