1use crate::core::{CoreReader, ReadResult};
2use crate::records::{ByteRecord, ByteRecordBuilder};
3use crate::utils::trim_bom;
4
5pub struct TotalReaderBuilder {
6 delimiter: u8,
7 quote: u8,
8 has_headers: bool,
9}
10
11impl Default for TotalReaderBuilder {
12 fn default() -> Self {
13 Self {
14 delimiter: b',',
15 quote: b'"',
16 has_headers: true,
17 }
18 }
19}
20
21impl TotalReaderBuilder {
22 pub fn new() -> Self {
23 Self::default()
24 }
25
26 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
27 self.delimiter = delimiter;
28 self
29 }
30
31 pub fn quote(&mut self, quote: u8) -> &mut Self {
32 self.quote = quote;
33 self
34 }
35
36 pub fn has_headers(&mut self, yes: bool) -> &mut Self {
37 self.has_headers = yes;
38 self
39 }
40
41 pub fn from_bytes<'b>(&self, bytes: &'b [u8]) -> TotalReader<'b> {
42 TotalReader {
43 inner: CoreReader::new(self.delimiter, self.quote),
44 bytes,
45 pos: 0,
46 headers: ByteRecord::new(),
47 has_read: false,
48 has_headers: self.has_headers,
49 }
50 }
51}
52
53pub struct TotalReader<'b> {
56 inner: CoreReader,
57 bytes: &'b [u8],
58 pos: usize,
59 headers: ByteRecord,
60 has_read: bool,
61 has_headers: bool,
62}
63
64impl<'b> TotalReader<'b> {
65 pub fn from_bytes(bytes: &'b [u8]) -> Self {
66 TotalReaderBuilder::new().from_bytes(bytes)
67 }
68
69 #[inline]
70 fn on_first_read(&mut self) {
71 if self.has_read {
72 return;
73 }
74
75 let bom_len = trim_bom(self.bytes);
77 self.pos += bom_len;
78
79 let mut headers = ByteRecord::new();
81
82 let has_data = self.read_byte_record_impl(&mut headers);
83
84 if has_data && !self.has_headers {
85 self.pos = bom_len;
86 }
87
88 self.headers = headers;
89 self.has_read = true;
90 }
91
92 #[inline]
93 pub fn byte_headers(&mut self) -> &ByteRecord {
94 self.on_first_read();
95
96 &self.headers
97 }
98
99 pub fn count_records(&mut self) -> u64 {
100 use ReadResult::*;
101
102 self.on_first_read();
103
104 let mut count: u64 = 0;
105
106 loop {
107 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
108
109 self.pos += pos;
110
111 match result {
112 End => break,
113 InputEmpty | Cr | Lf => continue,
114 Record => {
115 count += 1;
116 }
117 };
118 }
119
120 count.saturating_sub(if self.has_headers { 1 } else { 0 })
121 }
122
123 pub fn split_record(&mut self) -> Option<&[u8]> {
124 use ReadResult::*;
125
126 self.on_first_read();
127
128 let starting_pos = self.pos;
129
130 loop {
131 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
132
133 self.pos += pos;
134
135 match result {
136 End => return None,
137 InputEmpty | Cr | Lf => continue,
138 Record => return Some(&self.bytes[starting_pos..self.pos]),
139 }
140 }
141 }
142
143 fn read_byte_record_impl(&mut self, record: &mut ByteRecord) -> bool {
144 use ReadResult::*;
145
146 record.clear();
147
148 let mut record_builder = ByteRecordBuilder::wrap(record);
149
150 loop {
151 let (result, pos) = self
152 .inner
153 .read_record(&self.bytes[self.pos..], &mut record_builder);
154
155 self.pos += pos;
156
157 match result {
158 End => {
159 return false;
160 }
161 Cr | Lf | InputEmpty => {
162 continue;
163 }
164 Record => {
165 return true;
166 }
167 };
168 }
169 }
170
171 #[inline(always)]
172 pub fn read_byte_record(&mut self, record: &mut ByteRecord) -> bool {
173 self.on_first_read();
174 self.read_byte_record_impl(record)
175 }
176
177 #[inline(always)]
178 pub fn byte_records<'r>(&'r mut self) -> ByteRecordsIter<'r, 'b> {
179 ByteRecordsIter {
180 reader: self,
181 record: ByteRecord::new(),
182 }
183 }
184
185 #[inline(always)]
186 pub fn position(&self) -> u64 {
187 self.pos as u64
188 }
189}
190
191pub struct ByteRecordsIter<'r, 'b> {
192 reader: &'r mut TotalReader<'b>,
193 record: ByteRecord,
194}
195
196impl Iterator for ByteRecordsIter<'_, '_> {
197 type Item = ByteRecord;
198
199 #[inline]
200 fn next(&mut self) -> Option<Self::Item> {
201 if self.reader.read_byte_record(&mut self.record) {
204 Some(self.record.clone())
205 } else {
206 None
207 }
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::*;
214
215 use crate::brec;
216
217 impl<'b> TotalReader<'b> {
218 fn from_bytes_no_headers(bytes: &'b [u8]) -> Self {
219 TotalReaderBuilder::new()
220 .has_headers(false)
221 .from_bytes(bytes)
222 }
223 }
224
225 fn count_records(data: &str) -> u64 {
226 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
227 reader.count_records()
228 }
229
230 fn split_records(data: &str) -> u64 {
231 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
232
233 let mut count: u64 = 0;
234
235 while reader.split_record().is_some() {
236 count += 1;
237 }
238
239 count
240 }
241
242 #[test]
243 fn test_count() {
244 assert_eq!(count_records(""), 0);
246
247 let tests = vec![
249 "name\njohn\nlucy",
250 "name\njohn\nlucy\n",
251 "name\n\njohn\r\nlucy\n",
252 "name\n\njohn\r\nlucy\n\n",
253 "name\n\n\njohn\r\n\r\nlucy\n\n\n",
254 "\nname\njohn\nlucy",
255 "\n\nname\njohn\nlucy",
256 "\r\n\r\nname\njohn\nlucy",
257 "name\njohn\nlucy\r\n",
258 "name\njohn\nlucy\r\n\r\n",
259 ];
260
261 for test in tests.iter() {
262 assert_eq!(count_records(test), 3, "string={:?}", test);
263 assert_eq!(split_records(test), 3, "string={:?}", test);
264 }
265 }
266
267 #[test]
268 fn test_byte_headers() {
269 let data = b"name,surname\njohn,dandy";
270
271 let mut reader = TotalReader::from_bytes(data);
273 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
274 assert_eq!(
275 reader.byte_records().next().unwrap(),
276 brec!["john", "dandy"]
277 );
278
279 let mut reader = TotalReader::from_bytes(data);
281 assert_eq!(
282 reader.byte_records().next().unwrap(),
283 brec!["john", "dandy"]
284 );
285 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
286
287 let mut reader = TotalReader::from_bytes_no_headers(data);
289 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
290 assert_eq!(
291 reader.byte_records().next().unwrap(),
292 brec!["name", "surname"]
293 );
294
295 let mut reader = TotalReader::from_bytes_no_headers(data);
297 assert_eq!(
298 reader.byte_records().next().unwrap(),
299 brec!["name", "surname"]
300 );
301 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
302
303 let mut reader = TotalReader::from_bytes(b"");
305 assert_eq!(reader.byte_headers(), &brec![]);
306 assert!(reader.byte_records().next().is_none());
307
308 let mut reader = TotalReader::from_bytes_no_headers(b"");
310 assert_eq!(reader.byte_headers(), &brec![]);
311 assert!(reader.byte_records().next().is_none());
312 }
313}