1extern crate csv;
2
3use std::io::Read;
4use csv::ReaderBuilder;
5
6#[cfg(test)]
7use std::io::BufReader;
8
9#[cfg(test)]
10struct FakeCsvReader {
11 src: String,
12 pos: usize,
13 max_read: Option<usize>,
14}
15
16
17#[cfg(test)]
18impl FakeCsvReader {
19 pub fn new_by_size(strng: String, size: usize) -> FakeCsvReader {
20 return FakeCsvReader {
21 src: strng,
22 pos: 0,
23 max_read: Option::Some(size),
24 }
25 }
26 pub fn new(strng: String) -> FakeCsvReader {
27 return FakeCsvReader {
28 src: strng,
29 pos: 0,
30 max_read: Option::None,
31 }
32 }
33}
34
35
36#[cfg(test)]
37impl Read for FakeCsvReader {
38
39 fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
40
41 let mut to_read = self.src.len() - self.pos;
42
43 if to_read > buf.len() {
44 to_read = buf.len();
45 }
46
47 if to_read > self.max_read.unwrap_or(to_read) {
48 to_read = self.max_read.unwrap_or(to_read);
49 }
50
51 if to_read == 0 {
52 return Result::Ok(0);
53 }
54
55 for i in 0..to_read {
56 buf[i] = self.src.as_bytes()[i + self.pos];
57 }
58
59 self.pos = self.pos + to_read;
60
61 Result::Ok(to_read)
62
63 }
64
65}
66
67
68#[test]
69fn fake_reader_works() {
70
71 let fr = FakeCsvReader::new("hi there".to_string());
72 let mut f = BufReader::new(fr);
73 let mut buffer = String::new();
74
75 match f.read_to_string(&mut buffer) {
76 Ok(r) => {
77 println!("RESULT: {}: {}", r, buffer);
78 },
79 Err(e) => println!("ERROR: {}", e)
80 }
81
82 assert_eq!(buffer, "hi there".to_string());
83}
84
85
86struct BufferAcc {
87 current_line: usize,
88 max_line: usize,
89 count: usize,
90}
91
92impl std::fmt::Debug for BufferAcc {
93 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
94 write!(
95 f,
96 "BufferAcc({},{},{})",
97 self.count,
98 self.max_line,
99 self.current_line,
100 )
101 }
102}
103
104fn is_nl(c: u8) -> bool {
105 if (c == 13) || (c == 10) {
106 return true;
107 }
108 false
109}
110
111
112fn has_nl(input: &[u8]) -> bool {
113 let mut last: u8 = 0;
114 for inp in input {
115 if is_nl(last) {
116 return true;
117 }
118 last = *inp;
119 }
120 false
121}
122
123
124#[cfg(test)]
125fn str_to_vec(s: String) -> Vec<u8> {
126 let mut v = vec![];
127 let bs = s.as_bytes();
128 for i in 0..bs.len() {
129 v.push(bs[i]);
130 }
131 v
132}
133
134
135#[test]
136fn test_has_nl() {
137 assert_eq!(has_nl(&str_to_vec("hi there\r\n".to_string())), true);
138 assert_eq!(has_nl(&str_to_vec("hi there\nbob".to_string())), true);
139 assert_eq!(has_nl(&str_to_vec("hi there\r".to_string())), false);
140 assert_eq!(has_nl(&str_to_vec("hi there\n".to_string())), false);
141 assert_eq!(has_nl(&str_to_vec("hi there".to_string())), false);
142}
143
144
145fn fill<R>(mut rdr: R, v: &mut Vec<u8>) -> Result<(), std::io::Error> where R: Read {
148 let mut number_of_bytes_read = 999;
149 while (number_of_bytes_read > 0) && (!has_nl(&v)) {
150 #[cfg(test)]
151 let mut bytes = [0; 8];
152 #[cfg(not(test))]
153 let mut bytes = [0; 8192];
154 number_of_bytes_read = rdr.read(&mut bytes)?;
155 v.append(&mut bytes[0..number_of_bytes_read].to_vec());
156 }
157 Result::Ok(())
158}
159
160
161#[test]
162fn test_fill() {
163 let csv = vec![
164 "full of trash".to_string(),
165 "but before the real data".to_string(),
166 "a".to_string(),
167 ];
168 let mut fr = FakeCsvReader::new(csv.join("\n"));
169 let mut unprocessed = "This is a header ".to_string().as_bytes().to_vec();
170 fill(&mut fr, &mut unprocessed).unwrap();
171 let mut fr_buffer = String::new();
172 fr.read_to_string(&mut fr_buffer).unwrap();
173 assert_eq!(fr_buffer, "t before the real data\na");
174 assert_eq!(
175 unprocessed,
176 "This is a header full of trash\nbu".to_string().as_bytes()
177 );
178}
179
180
181fn get_line(unprocessed: &mut Vec<u8>) -> Vec<u8> {
182
183 let get_byte_count = || {
184 let i = 0;
185 for i in 1..unprocessed.len() {
186 match (is_nl(unprocessed[i - 1]), is_nl(unprocessed[i])) {
187 (true, true) => return i + 1,
188 (true, false) => return i,
189 (false, _) => {}
190 }
191 }
192 i
193 };
194
195 let byte_count = get_byte_count();
196
197 let r = unprocessed[..byte_count].to_vec();
198 unprocessed.drain(..byte_count);
199 r
200
201}
202
203#[test]
204fn test_get_line() {
205 let mut unp1 = str_to_vec("hi there\r\n".to_string());
206 let r1 = get_line(&mut unp1);
207 assert_eq!(r1, str_to_vec("hi there\r\n".to_string()));
208 assert_eq!(unp1, str_to_vec("".to_string()));
209
210 let mut unp2 = str_to_vec("hi there\nhow are you bob?".to_string());
211 let r2 = get_line(&mut unp2);
212 assert_eq!(r2, str_to_vec("hi there\n".to_string()));
213 assert_eq!(unp2, str_to_vec("how are you bob?".to_string()));
214
215 let mut unp3 = str_to_vec("\rhi there".to_string());
216 let r3 = get_line(&mut unp3);
217 assert_eq!(r3, str_to_vec("\r".to_string()));
218 assert_eq!(unp3, str_to_vec("hi there".to_string()));
219
220 let mut unp3 = str_to_vec("hi there".to_string());
221 let r3 = get_line(&mut unp3);
222 assert_eq!(r3, str_to_vec("".to_string()));
223 assert_eq!(unp3, str_to_vec("hi there".to_string()));
224
225 let mut unp4 = str_to_vec("".to_string());
226 let r4 = get_line(&mut unp4);
227 assert_eq!(r4, str_to_vec("".to_string()));
228 assert_eq!(unp4, str_to_vec("".to_string()));
229}
230
231
232fn count_seperators(field_seperator: u8, line: &[u8]) -> usize {
233
234 let mut rdr = ReaderBuilder::new()
235 .delimiter(field_seperator)
236 .has_headers(false)
237 .from_reader(line);
238
239 match rdr.byte_records().next() {
240 Some(rec) => {
241 rec.unwrap_or_default().len()
242 }
243 None => 0
244 }
245
246}
247
248
249#[test]
250fn test_count_seperators() {
251 assert_eq!(
252 count_seperators(44, &str_to_vec("This,has,4,fields".to_string())),
253 4
254 );
255}
256
257
258type Buffer = Vec<Vec<u8>>;
259
260pub struct Blade<R: Read> {
261 rdr: R,
262 field_seperator: u8,
263 buffer: Buffer,
264 unprocessed: Buffer,
265 prepared: bool,
266 consider_lines: usize,
267}
268
269
270fn read_from_buffer(src_buffer: &mut Buffer, return_buf: &mut [u8]) -> Result<usize, std::io::Error> {
273
274 if src_buffer.is_empty() {
275 return Result::Ok(0);
276 }
277
278 let mut count = src_buffer[0].len();
279 let mut shift = true;
280 let as_bytes = src_buffer.remove(0);
281
282 if return_buf.len() < as_bytes.len() {
283 count = return_buf.len();
284 shift = false;
285 }
286
287 return_buf[..count].clone_from_slice(&as_bytes[..count]);
288
289 if !shift {
290 src_buffer.insert(0, as_bytes[count..].to_vec());
291 }
292
293 Result::Ok(count)
294}
295
296
297#[test]
298fn test_read_from_buffer_empty() {
299 let mut return_buffer = [0; 4];
300 let mut src_buffer: Buffer = vec![vec![]];
301 let expected: Buffer = vec![];
302 assert_eq!(
303 read_from_buffer(&mut src_buffer, &mut return_buffer).unwrap_or_default(),
304 0
305 );
306 assert_eq!(return_buffer, [0; 4]);
307 assert_eq!(src_buffer, expected);
308}
309
310
311#[test]
312fn test_read_from_buffer_full_line() {
313 let mut return_buffer = [0; 4];
314 let mut src_buffer = vec![vec![1, 2, 3, 4], vec![5, 6, 7, 8], vec![9]];
315 assert_eq!(
316 read_from_buffer(&mut src_buffer, &mut return_buffer).unwrap_or_default(),
317 4
318 );
319 assert_eq!(return_buffer, [1, 2, 3, 4]);
320 assert_eq!(src_buffer, vec![vec![5, 6, 7, 8], vec![9]]);
321}
322
323
324#[test]
325fn test_read_from_buffer_partial_line() {
326 let mut return_buffer = [0; 8];
327 let mut src_buffer = vec![vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]];
328 assert_eq!(
329 read_from_buffer(&mut src_buffer, &mut return_buffer).unwrap_or_default(),
330 8
331 );
332 assert_eq!(return_buffer, [1, 2, 3, 4, 5, 6, 7, 8]);
333 assert_eq!(src_buffer, vec![vec![9, 10, 11, 12, 13, 14]]);
334}
335
336
337fn prepare_fill<R>(consider_lines: usize, mut rdr: R, process_buffer: &mut Buffer, unprocessed: &mut Buffer) -> Result<(), std::io::Error> where R: Read {
340 let mut did_read = true;
341
342 let mut read_buffer = vec![];
343
344 while (process_buffer.len() < consider_lines) && did_read {
345 fill(&mut rdr, &mut read_buffer)?;
346 let mut added_length = 9;
347 did_read = false;
348 while (process_buffer.len() < consider_lines) && added_length > 0 {
349 let line = get_line(&mut read_buffer);
350 added_length = line.len();
351 if added_length > 0 {
352 process_buffer.push(line);
353 did_read = true;
354 }
355 }
356 }
357
358 let mut unproc = vec![];
359 for r in read_buffer {
360 unproc.push(r);
361 }
362
363 unprocessed.push(unproc);
364
365 Result::Ok(())
366
367}
368
369
370#[test]
371fn test_prepare_fill_needs_multiple_reads() {
372 let csv = vec![
373 "01234".to_string(),
374 "56789".to_string(),
375 "abcde".to_string(),
376 "defgh".to_string(),
377 ];
378 let fr = FakeCsvReader::new_by_size(csv.join("\n"), 7);
379
380 let mut return_buffer: Buffer = vec![];
381 let mut unprocessed: Buffer = vec![];
382 assert_eq!(
383 prepare_fill(2, fr, &mut return_buffer, &mut unprocessed).unwrap(),
384 ()
385 );
386
387 let expected: Buffer = vec![
388 vec![48, 49, 50, 51, 52, 10],
389 vec![53, 54, 55, 56, 57, 10]
390 ];
391 assert_eq!(return_buffer, expected);
392 assert_eq!(unprocessed, vec![vec![97, 98]]);
393}
394
395
396impl<R> Blade<R> where R: Read {
397
398 fn prepare(&mut self) -> Result<usize, std::io::Error> {
399
400 let mut process_buffer = vec![];
401 let mut unprocessed = vec![];
402
403 prepare_fill(self.consider_lines, &mut self.rdr, &mut process_buffer, &mut unprocessed)?;
404
405 self.unprocessed = unprocessed;
406
407 let max = (&process_buffer).iter().fold(
408 BufferAcc { count: 0, current_line: 0, max_line: 0 },
409 |acc, line| {
410 let c = count_seperators(
411 self.field_seperator,
412 line
413 );
414 if c <= acc.count {
415 let r = BufferAcc { current_line: acc.current_line + 1, ..acc };
416 return r;
417 }
418
419 BufferAcc {
420 count: c,
421 current_line: acc.current_line + 1,
422 max_line: acc.current_line
423 }
424 }
425 );
426
427 while process_buffer.len() > max.max_line {
428 self.buffer.push(process_buffer.remove(max.max_line).to_vec());
429 }
430
431 Result::Ok(self.buffer.len())
432
433 }
434
435
436 pub fn new(reader: R, field_seperator: u8, consider_lines: usize) -> Blade<R> {
437 Blade {
438 rdr: reader,
439 field_seperator,
440 unprocessed: vec![],
441 buffer: vec![],
442 prepared: false,
443 consider_lines
444 }
445 }
446
447 fn read_rest(&mut self, return_buf: &mut [u8]) -> Result<usize, std::io::Error> {
448 let length = self.unprocessed.len();
449 if length > 0 {
450 return read_from_buffer(&mut self.unprocessed, return_buf);
451 }
452 self.rdr.read(return_buf)
453 }
454}
455
456
457impl<R> Read for Blade<R> where R: Read {
458
459 fn read(&mut self, return_buf: &mut [u8]) -> Result<usize, std::io::Error> {
460
461 if !self.prepared {
462 self.prepare()?;
463 self.prepared = true;
464 }
465
466 if self.buffer.is_empty() {
467 return self.read_rest(return_buf);
468 }
469
470 read_from_buffer(&mut self.buffer, return_buf)
471
472 }
473
474}
475
476
477#[test]
478fn it_skips_header() {
479
480 let csv = vec![
481 "This is a header".to_string(),
482 "Full of nonsense, rubbish and problems".to_string(),
483 "but before the real data".to_string(),
484 "name,age,gender".to_string(),
485 "bob,22,M".to_string(),
486 "jane,21,F".to_string(),
487 "freddy,19,M".to_string()
488 ];
489 let fr = FakeCsvReader::new(csv.join("\n"));
490 let rf = Blade::new(fr, 44, 20);
491 let mut br = BufReader::new(rf);
492 let mut buffer = String::new();
493
494 match br.read_to_string(&mut buffer) {
495 Ok(r) => {
496 println!("RESULT: {}: {}", r, buffer);
497 },
498 Err(e) => println!("ERROR: {}", e)
499 }
500
501 assert_eq!(buffer, csv[3..].join("\n"));
502}
503
504
505#[test]
506fn it_only_considers_upto_considers() {
507 let csv = vec![
508 "This is a header".to_string(),
509 "Full of nonsense, rubbish and problems".to_string(),
510 "but before the real data".to_string(),
511 "name,age,gender".to_string(),
512 "bob,22,M".to_string(),
513 "jane,21,F".to_string(),
514 "freddy,19,M".to_string()
515 ];
516 let fr = FakeCsvReader::new(csv.join("\n"));
517 let rf = Blade::new(fr, 44, 3);
518 let mut br = BufReader::new(rf);
519 let mut buffer = String::new();
520
521 match br.read_to_string(&mut buffer) {
522 Ok(r) => {
523 println!("RESULT: {}: {}", r, buffer);
524 },
525 Err(e) => println!("ERROR: {}", e)
526 }
527
528 assert_eq!(buffer, csv[1..].join("\n"));
529}
530
531