1use std::io::{BufRead, BufReader, Read};
2
3use crate::core::{CoreReader, ReadResult};
4use crate::error::{self, Error};
5use crate::records::{ByteRecord, ByteRecordBuilder};
6use crate::utils::trim_bom;
7
8pub struct ReaderBuilder {
9 delimiter: u8,
10 quote: u8,
11 buffer_capacity: Option<usize>,
12 flexible: bool,
13 has_headers: bool,
14}
15
16impl Default for ReaderBuilder {
17 fn default() -> Self {
18 Self {
19 delimiter: b',',
20 quote: b'"',
21 buffer_capacity: None,
22 flexible: false,
23 has_headers: true,
24 }
25 }
26}
27
28impl ReaderBuilder {
29 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn with_capacity(capacity: usize) -> Self {
34 let mut reader = Self::default();
35 reader.buffer_capacity(capacity);
36 reader
37 }
38
39 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
40 self.delimiter = delimiter;
41 self
42 }
43
44 pub fn quote(&mut self, quote: u8) -> &mut Self {
45 self.quote = quote;
46 self
47 }
48
49 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
50 self.buffer_capacity = Some(capacity);
51 self
52 }
53
54 pub fn flexible(&mut self, yes: bool) -> &mut Self {
55 self.flexible = yes;
56 self
57 }
58
59 pub fn has_headers(&mut self, yes: bool) -> &mut Self {
60 self.has_headers = yes;
61 self
62 }
63
64 fn bufreader<R: Read>(&self, reader: R) -> BufReader<R> {
65 match self.buffer_capacity {
66 None => BufReader::new(reader),
67 Some(capacity) => BufReader::with_capacity(capacity, reader),
68 }
69 }
70
71 pub fn from_reader<R: Read>(&self, reader: R) -> Reader<R> {
72 Reader {
73 buffer: self.bufreader(reader),
74 inner: CoreReader::new(self.delimiter, self.quote),
75 flexible: self.flexible,
76 headers: ByteRecord::new(),
77 has_read: false,
78 must_reemit_headers: !self.has_headers,
79 }
80 }
81}
82
83pub struct Reader<R> {
84 buffer: BufReader<R>,
85 inner: CoreReader,
86 flexible: bool,
87 headers: ByteRecord,
88 has_read: bool,
89 must_reemit_headers: bool,
90}
91
92impl<R: Read> Reader<R> {
93 pub fn from_reader(reader: R) -> Self {
94 ReaderBuilder::new().from_reader(reader)
95 }
96
97 #[inline]
98 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
99 if self.flexible {
100 return Ok(());
101 }
102
103 if self.has_read && written != self.headers.len() {
104 return Err(Error::unequal_lengths(self.headers.len(), written));
105 }
106
107 Ok(())
108 }
109
110 fn read_byte_record_impl(&mut self, record: &mut ByteRecord) -> error::Result<bool> {
111 use ReadResult::*;
112
113 record.clear();
114
115 let mut record_builder = ByteRecordBuilder::wrap(record);
116
117 loop {
118 let input = self.buffer.fill_buf()?;
119
120 let (result, pos) = self.inner.read_record(input, &mut record_builder);
121
122 self.buffer.consume(pos);
123
124 match result {
125 End => {
126 return Ok(false);
127 }
128 Cr | Lf | InputEmpty => {
129 continue;
130 }
131 Record => {
132 self.check_field_count(record.len())?;
133 return Ok(true);
134 }
135 };
136 }
137 }
138
139 #[inline]
140 fn on_first_read(&mut self) -> error::Result<()> {
141 if self.has_read {
142 return Ok(());
143 }
144
145 let input = self.buffer.fill_buf()?;
147 let bom_len = trim_bom(input);
148 self.buffer.consume(bom_len);
149
150 let mut headers = ByteRecord::new();
152
153 let has_data = self.read_byte_record_impl(&mut headers)?;
154
155 if !has_data {
156 self.must_reemit_headers = false;
157 }
158
159 self.headers = headers;
160 self.has_read = true;
161
162 Ok(())
163 }
164
165 #[inline]
166 pub fn byte_headers(&mut self) -> error::Result<&ByteRecord> {
167 self.on_first_read()?;
168
169 Ok(&self.headers)
170 }
171
172 #[inline(always)]
173 pub fn read_byte_record(&mut self, record: &mut ByteRecord) -> error::Result<bool> {
174 self.on_first_read()?;
175
176 if self.must_reemit_headers {
177 self.headers.clone_into(record);
178 self.must_reemit_headers = false;
179 return Ok(true);
180 }
181
182 self.read_byte_record_impl(record)
183 }
184
185 pub fn byte_records(&mut self) -> ByteRecordsIter<'_, R> {
186 ByteRecordsIter {
187 reader: self,
188 record: ByteRecord::new(),
189 }
190 }
191
192 pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
193 ByteRecordsIntoIter {
194 reader: self,
195 record: ByteRecord::new(),
196 }
197 }
198}
199
200pub struct ByteRecordsIter<'r, R> {
201 reader: &'r mut Reader<R>,
202 record: ByteRecord,
203}
204
205impl<'r, R: Read> Iterator for ByteRecordsIter<'r, R> {
206 type Item = error::Result<ByteRecord>;
207
208 #[inline]
209 fn next(&mut self) -> Option<Self::Item> {
210 match self.reader.read_byte_record(&mut self.record) {
213 Err(err) => Some(Err(err)),
214 Ok(true) => Some(Ok(self.record.clone())),
215 Ok(false) => None,
216 }
217 }
218}
219
220pub struct ByteRecordsIntoIter<R> {
221 reader: Reader<R>,
222 record: ByteRecord,
223}
224
225impl<R: Read> Iterator for ByteRecordsIntoIter<R> {
226 type Item = error::Result<ByteRecord>;
227
228 #[inline]
229 fn next(&mut self) -> Option<Self::Item> {
230 match self.reader.read_byte_record(&mut self.record) {
233 Err(err) => Some(Err(err)),
234 Ok(true) => Some(Ok(self.record.clone())),
235 Ok(false) => None,
236 }
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use std::io::Cursor;
243
244 use crate::brec;
245
246 use super::*;
247
248 impl<R: Read> Reader<R> {
249 fn from_reader_no_headers(reader: R) -> Self {
250 ReaderBuilder::new().has_headers(false).from_reader(reader)
251 }
252 }
253
254 #[test]
255 fn test_read_byte_record() -> error::Result<()> {
256 let csv = "name,surname,age\n\"john\",\"landy, the \"\"everlasting\"\" bastard\",45\n\"\"\"ok\"\"\",whatever,dude\nlucy,rose,\"67\"\njermaine,jackson,\"89\"\n\nkarine,loucan,\"52\"\nrose,\"glib\",12\n\"guillaume\",\"plique\",\"42\"\r\n";
257
258 let expected = vec![
259 brec!["name", "surname", "age"],
260 brec!["john", "landy, the \"everlasting\" bastard", "45"],
261 brec!["\"ok\"", "whatever", "dude"],
262 brec!["lucy", "rose", "67"],
263 brec!["jermaine", "jackson", "89"],
264 brec!["karine", "loucan", "52"],
265 brec!["rose", "glib", "12"],
266 brec!["guillaume", "plique", "42"],
267 ];
268
269 for capacity in [32usize, 4, 3, 2, 1] {
270 let mut reader = ReaderBuilder::with_capacity(capacity)
271 .has_headers(false)
272 .from_reader(Cursor::new(csv));
273
274 assert_eq!(
275 reader.byte_records().collect::<Result<Vec<_>, _>>()?,
276 expected,
277 );
278 }
279
280 Ok(())
281 }
282
283 #[test]
284 fn test_strip_bom() -> error::Result<()> {
285 let mut reader = Reader::from_reader_no_headers(Cursor::new("name,surname,age"));
286
287 assert_eq!(
288 reader.byte_records().next().unwrap()?,
289 brec!["name", "surname", "age"]
290 );
291
292 let mut reader =
293 Reader::from_reader_no_headers(Cursor::new(b"\xef\xbb\xbfname,surname,age"));
294
295 assert_eq!(
296 reader.byte_records().next().unwrap()?,
297 brec!["name", "surname", "age"]
298 );
299
300 Ok(())
301 }
302
303 #[test]
304 fn test_empty_row() -> error::Result<()> {
305 let data = "name\n\"\"\nlucy\n\"\"";
306
307 let reader = Reader::from_reader_no_headers(Cursor::new(data));
309
310 let expected = vec![brec!["name"], brec![""], brec!["lucy"], brec![""]];
311
312 let records = reader.into_byte_records().collect::<Result<Vec<_>, _>>()?;
313
314 assert_eq!(records, expected);
315
316 Ok(())
317 }
318
319 #[test]
320 fn test_crlf() -> error::Result<()> {
321 let reader = Reader::from_reader_no_headers(Cursor::new(
322 "name,surname\r\nlucy,\"john\"\r\nevan,zhong\r\nbéatrice,glougou\r\n",
323 ));
324
325 let expected = vec![
326 brec!["name", "surname"],
327 brec!["lucy", "john"],
328 brec!["evan", "zhong"],
329 brec!["béatrice", "glougou"],
330 ];
331
332 let records = reader.into_byte_records().collect::<Result<Vec<_>, _>>()?;
333
334 assert_eq!(records, expected);
335
336 Ok(())
337 }
338
339 #[test]
340 fn test_quote_always() -> error::Result<()> {
341 let reader = Reader::from_reader_no_headers(Cursor::new(
342 "\"name\",\"surname\"\n\"lucy\",\"rose\"\n\"john\",\"mayhew\"",
343 ));
344
345 let expected = vec![
346 brec!["name", "surname"],
347 brec!["lucy", "rose"],
348 brec!["john", "mayhew"],
349 ];
350
351 let records = reader.into_byte_records().collect::<Result<Vec<_>, _>>()?;
352
353 assert_eq!(records, expected);
354
355 Ok(())
356 }
357
358 #[test]
359 fn test_byte_headers() -> error::Result<()> {
360 let data = b"name,surname\njohn,dandy";
361
362 let mut reader = Reader::from_reader(Cursor::new(data));
364 assert_eq!(reader.byte_headers()?, &brec!["name", "surname"]);
365 assert_eq!(
366 reader.byte_records().next().unwrap()?,
367 brec!["john", "dandy"]
368 );
369
370 let mut reader = Reader::from_reader(Cursor::new(data));
372 assert_eq!(
373 reader.byte_records().next().unwrap()?,
374 brec!["john", "dandy"]
375 );
376 assert_eq!(reader.byte_headers()?, &brec!["name", "surname"]);
377
378 let mut reader = Reader::from_reader_no_headers(Cursor::new(data));
380 assert_eq!(reader.byte_headers()?, &brec!["name", "surname"]);
381 assert_eq!(
382 reader.byte_records().next().unwrap()?,
383 brec!["name", "surname"]
384 );
385
386 let mut reader = Reader::from_reader_no_headers(Cursor::new(data));
388 assert_eq!(
389 reader.byte_records().next().unwrap()?,
390 brec!["name", "surname"]
391 );
392 assert_eq!(reader.byte_headers()?, &brec!["name", "surname"]);
393
394 let mut reader = Reader::from_reader(Cursor::new(b""));
396 assert_eq!(reader.byte_headers()?, &brec![]);
397 assert!(reader.byte_records().next().is_none());
398
399 let mut reader = Reader::from_reader_no_headers(Cursor::new(b""));
401 assert_eq!(reader.byte_headers()?, &brec![]);
402 assert!(reader.byte_records().next().is_none());
403
404 Ok(())
405 }
406
407 #[test]
408 fn test_weirdness() -> error::Result<()> {
409 let data =
411 b"name,surname\n\"test\" \"wat\", ok\ntest \"wat\",ok \ntest,\"whatever\" ok\n\"test\" there,\"ok\"\r\n";
412 let mut reader = Reader::from_reader_no_headers(Cursor::new(data));
413
414 let records = reader.byte_records().collect::<Result<Vec<_>, _>>()?;
415
416 let expected = vec![
417 brec!["name", "surname"],
418 brec!["test \"wat", " ok"],
419 brec!["test \"wat", "ok "],
420 brec!["test", "whatever ok"],
421 brec!["test there", "ok"],
422 ];
423
424 assert_eq!(records, expected);
425
426 let data = b"name,surname\n\r\rjohn,coucou";
433 let mut reader = Reader::from_reader_no_headers(Cursor::new(data));
434 let records = reader.byte_records().collect::<Result<Vec<_>, _>>()?;
435
436 assert_eq!(
437 records,
438 vec![brec!["name", "surname"], brec!["john", "coucou"]]
439 );
440
441 Ok(())
442 }
443}