rsv_lib/csv/
clean.rs

1use crate::args::Clean;
2use crate::utils;
3use crate::utils::cli_result::CliResult;
4use crate::utils::progress::Progress;
5use regex::bytes::Regex;
6use std::fs::File;
7use std::io::{BufRead, BufReader, BufWriter, Write};
8use std::path::Path;
9
10impl Clean {
11    pub fn csv_run(&self) -> CliResult {
12        let path = &self.path();
13
14        // new file
15        let new_path = match self.output.is_empty() {
16            true => utils::filename::new_path(path, "-cleaned"),
17            false => Path::new(&self.output).into(),
18        };
19
20        // open files
21        let mut rdr = BufReader::new(File::open(path)?);
22        let mut wtr = BufWriter::new(File::create(&new_path)?);
23
24        // progress
25        let mut prog = Progress::new();
26
27        // copy
28        let re = Regex::new(&self.escape)?;
29        let empty_bytes = b"";
30
31        let mut buf = vec![];
32        let mut i = 0;
33        while let Ok(bytes_read) = rdr.read_until(b'\n', &mut buf) {
34            if bytes_read == 0 {
35                break;
36            }
37
38            let str = re.replace_all(&buf[..bytes_read], empty_bytes);
39            wtr.write_all(&str)?;
40            buf.clear();
41
42            // progress print
43            prog.add_bytes(bytes_read);
44            if i % 50_000 == 0 {
45                prog.add_chunks(1);
46                prog.print();
47            }
48
49            i += 1;
50        }
51
52        prog.print();
53
54        println!("\nSaved to file: {}", new_path.display());
55
56        Ok(())
57    }
58}