1use std::error::Error;
2use std::io::BufWriter;
3use std::{
4 fs::File,
5 io::{BufRead, BufReader, Write},
6 path::Path,
7};
8
9use super::constants::MB_USIZE;
10use super::row_split::CsvRowSplitter;
11
12pub fn estimate_row_bytes(path: &Path) -> Result<f64, Box<dyn Error>> {
13 let mut n = 0;
15 let mut bytes = 0;
16 let file = File::open(path)?;
17 for l in BufReader::new(file).lines().skip(1) {
18 bytes += l.unwrap().len() + 1;
19 n += 1;
20
21 if n > 5000 {
22 break;
23 }
24 }
25
26 Ok((bytes as f64) / (n as f64))
28}
29
30pub fn column_n(path: &Path, sep: char, quote: char) -> Result<Option<usize>, Box<dyn Error>> {
31 let rdr = BufReader::new(File::open(path)?);
33 let n = rdr
34 .lines()
35 .next()
36 .map(|i| i.ok())
37 .unwrap_or_default()
38 .map(|i| CsvRowSplitter::new(&i, sep, quote).count());
39
40 Ok(n)
41}
42
43#[allow(dead_code)]
44pub fn estimate_line_count_by_mb(path: &Path, mb: Option<usize>) -> usize {
45 match estimate_row_bytes(path) {
46 Ok(v) => ((mb.unwrap_or(200) * MB_USIZE) as f64 / v) as usize,
48 Err(_) => 100_000,
49 }
50}
51
52pub fn write_frequency_to_csv(path: &Path, names: &Vec<String>, freq: Vec<(String, usize)>) {
53 let mut wtr = BufWriter::new(File::create(path).unwrap());
54
55 if !names.is_empty() {
57 writeln!(wtr, "{}", names.join(",")).unwrap();
58 }
59
60 for (k, v) in freq {
62 writeln!(wtr, "{k},{v}").unwrap();
63 }
64}
65
66pub fn is_excel(p: &Path) -> bool {
67 match p.extension() {
68 Some(e) => e == "xlsx" || e == "xls",
69 None => false,
70 }
71}