1use crate::core::{CoreReader, ReadResult};
2use crate::records::{ByteRecord, ByteRecordBuilder};
3use crate::utils::trim_bom;
4
5pub struct TotalReaderBuilder {
6 delimiter: u8,
7 quote: u8,
8 has_headers: bool,
9}
10
11impl Default for TotalReaderBuilder {
12 fn default() -> Self {
13 Self {
14 delimiter: b',',
15 quote: b'"',
16 has_headers: true,
17 }
18 }
19}
20
21impl TotalReaderBuilder {
22 pub fn new() -> Self {
23 Self::default()
24 }
25
26 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
27 self.delimiter = delimiter;
28 self
29 }
30
31 pub fn quote(&mut self, quote: u8) -> &mut Self {
32 self.quote = quote;
33 self
34 }
35
36 pub fn has_headers(&mut self, yes: bool) -> &mut Self {
37 self.has_headers = yes;
38 self
39 }
40
41 pub fn from_bytes<'b>(&self, bytes: &'b [u8]) -> TotalReader<'b> {
42 TotalReader {
43 inner: CoreReader::new(self.delimiter, self.quote),
44 bytes,
45 pos: 0,
46 headers: ByteRecord::new(),
47 has_read: false,
48 has_headers: self.has_headers,
49 }
50 }
51}
52
53pub struct TotalReader<'b> {
56 inner: CoreReader,
57 bytes: &'b [u8],
58 pos: usize,
59 headers: ByteRecord,
60 has_read: bool,
61 has_headers: bool,
62}
63
64impl<'b> TotalReader<'b> {
65 pub fn from_bytes(bytes: &'b [u8]) -> Self {
66 TotalReaderBuilder::new().from_bytes(bytes)
67 }
68
69 #[inline]
70 fn on_first_read(&mut self) {
71 if self.has_read {
72 return;
73 }
74
75 let bom_len = trim_bom(self.bytes);
77 self.pos += bom_len;
78
79 let mut headers = ByteRecord::new();
81
82 let has_data = self.read_byte_record_impl(&mut headers);
83
84 if has_data && !self.has_headers {
85 self.pos = bom_len;
86 }
87
88 self.headers = headers;
89 self.has_read = true;
90 }
91
92 #[inline]
93 pub fn byte_headers(&mut self) -> &ByteRecord {
94 self.on_first_read();
95
96 &self.headers
97 }
98
99 pub fn count_records(&mut self) -> u64 {
100 use ReadResult::*;
101
102 self.on_first_read();
103
104 let mut count: u64 = 0;
105
106 loop {
107 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
108
109 self.pos += pos;
110
111 match result {
112 End => break,
113 InputEmpty | Cr | Lf => continue,
114 Record => {
115 count += 1;
116 }
117 };
118 }
119
120 count.saturating_sub(if self.has_headers { 1 } else { 0 })
121 }
122
123 pub fn split_record(&mut self) -> Option<&[u8]> {
124 use ReadResult::*;
125
126 self.on_first_read();
127
128 let starting_pos = self.pos;
129
130 loop {
131 let (result, pos) = self.inner.split_record(&self.bytes[self.pos..]);
132
133 self.pos += pos;
134
135 match result {
136 End => return None,
137 InputEmpty | Cr | Lf => continue,
138 Record => return Some(&self.bytes[starting_pos..self.pos]),
139 }
140 }
141 }
142
143 fn read_byte_record_impl(&mut self, record: &mut ByteRecord) -> bool {
144 use ReadResult::*;
145
146 record.clear();
147
148 let mut record_builder = ByteRecordBuilder::wrap(record);
149
150 loop {
151 let (result, pos) = self
152 .inner
153 .read_record(&self.bytes[self.pos..], &mut record_builder);
154
155 self.pos += pos;
156
157 match result {
158 End => {
159 return false;
160 }
161 Cr | Lf | InputEmpty => {
162 continue;
163 }
164 Record => {
165 return true;
166 }
167 };
168 }
169 }
170
171 #[inline(always)]
172 pub fn read_byte_record(&mut self, record: &mut ByteRecord) -> bool {
173 self.on_first_read();
174 self.read_byte_record_impl(record)
175 }
176
177 #[inline(always)]
178 pub fn byte_records<'r>(&'r mut self) -> ByteRecordsIter<'r, 'b> {
179 ByteRecordsIter {
180 reader: self,
181 record: ByteRecord::new(),
182 }
183 }
184}
185
186pub struct ByteRecordsIter<'r, 'b> {
187 reader: &'r mut TotalReader<'b>,
188 record: ByteRecord,
189}
190
191impl<'r, 'b> Iterator for ByteRecordsIter<'r, 'b> {
192 type Item = ByteRecord;
193
194 #[inline]
195 fn next(&mut self) -> Option<Self::Item> {
196 if self.reader.read_byte_record(&mut self.record) {
199 Some(self.record.clone())
200 } else {
201 None
202 }
203 }
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209
210 use crate::brec;
211
212 impl<'b> TotalReader<'b> {
213 fn from_bytes_no_headers(bytes: &'b [u8]) -> Self {
214 TotalReaderBuilder::new()
215 .has_headers(false)
216 .from_bytes(bytes)
217 }
218 }
219
220 fn count_records(data: &str) -> u64 {
221 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
222 reader.count_records()
223 }
224
225 fn split_records(data: &str) -> u64 {
226 let mut reader = TotalReader::from_bytes_no_headers(data.as_bytes());
227
228 let mut count: u64 = 0;
229
230 while reader.split_record().is_some() {
231 count += 1;
232 }
233
234 count
235 }
236
237 #[test]
238 fn test_count() {
239 assert_eq!(count_records(""), 0);
241
242 let tests = vec![
244 "name\njohn\nlucy",
245 "name\njohn\nlucy\n",
246 "name\n\njohn\r\nlucy\n",
247 "name\n\njohn\r\nlucy\n\n",
248 "name\n\n\njohn\r\n\r\nlucy\n\n\n",
249 "\nname\njohn\nlucy",
250 "\n\nname\njohn\nlucy",
251 "\r\n\r\nname\njohn\nlucy",
252 "name\njohn\nlucy\r\n",
253 "name\njohn\nlucy\r\n\r\n",
254 ];
255
256 for test in tests.iter() {
257 assert_eq!(count_records(test), 3, "string={:?}", test);
258 assert_eq!(split_records(test), 3, "string={:?}", test);
259 }
260 }
261
262 #[test]
263 fn test_byte_headers() {
264 let data = b"name,surname\njohn,dandy";
265
266 let mut reader = TotalReader::from_bytes(data);
268 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
269 assert_eq!(
270 reader.byte_records().next().unwrap(),
271 brec!["john", "dandy"]
272 );
273
274 let mut reader = TotalReader::from_bytes(data);
276 assert_eq!(
277 reader.byte_records().next().unwrap(),
278 brec!["john", "dandy"]
279 );
280 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
281
282 let mut reader = TotalReader::from_bytes_no_headers(data);
284 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
285 assert_eq!(
286 reader.byte_records().next().unwrap(),
287 brec!["name", "surname"]
288 );
289
290 let mut reader = TotalReader::from_bytes_no_headers(data);
292 assert_eq!(
293 reader.byte_records().next().unwrap(),
294 brec!["name", "surname"]
295 );
296 assert_eq!(reader.byte_headers(), &brec!["name", "surname"]);
297
298 let mut reader = TotalReader::from_bytes(b"");
300 assert_eq!(reader.byte_headers(), &brec![]);
301 assert!(reader.byte_records().next().is_none());
302
303 let mut reader = TotalReader::from_bytes_no_headers(b"");
305 assert_eq!(reader.byte_headers(), &brec![]);
306 assert!(reader.byte_records().next().is_none());
307 }
308}