use super::fs::*;
use anyhow::{anyhow, bail, Context, Result};
use csv::{Reader, StringRecord};
use std::collections::HashMap;
use std::fs::File;
use std::hash::Hash;
use std::io;
use std::io::BufWriter;
use std::io::Write;
use std::str::FromStr;
#[derive(Debug)]
pub struct CSVFile {
path: String,
writer: Option<BufWriter<File>>,
}
impl Write for CSVFile {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.writer
.as_mut()
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::ReadOnlyFilesystem,
"The file is not opened in write mode",
)
})?
.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.writer
.as_mut()
.ok_or_else(|| {
io::Error::new(
io::ErrorKind::ReadOnlyFilesystem,
"The file is not opened in write mode",
)
})?
.flush()
}
}
impl CSVFile {
pub fn new(path: &str, mode: FileMode) -> Result<Self> {
Ok(Self {
path: path.to_string(),
writer: {
let file: File = open_file(path, mode)?;
if mode == FileMode::Read {
None
} else {
Some(BufWriter::new(file))
}
},
})
}
pub fn switch_mode(self, mode: FileMode) -> Result<Self> {
Self::new(&self.path, mode)
}
fn read(&self) -> Result<Reader<File>> {
if self.writer.is_some() {
bail!(
"Cannot read from {} since it is in write-only mode",
self.path
)
} else {
Ok(csv::ReaderBuilder::new()
.has_headers(true)
.double_quote(false)
.escape(Some(b'\\'))
.from_reader(open_file(&self.path, FileMode::Read)?))
}
}
pub fn write_header(&mut self, header: &[&str]) -> Result<()> {
match self.writer.as_mut() {
None => bail!(
"Cannot write to {} since it is in read-only mode",
self.path
),
Some(f) => {
if f.get_ref().metadata()?.len() == 0 {
writeln!(self, "{}", header.join(","))?
}
Ok(())
}
}
}
pub fn extract<T, F>(&self, extractor: F) -> Result<Vec<T>>
where
F: Fn(usize, StringRecord) -> Result<T>,
{
let mut res = Vec::<T>::new();
for (line, x) in self.read()?.records().enumerate() {
res.push(extractor(line, x?)?);
}
Ok(res)
}
pub fn column<T>(&self, i: usize) -> Result<Vec<T>>
where
T: FromStr,
{
self.extract(|line, record| {
record
.get(i)
.with_context(|| {
format!(
"Record {}: Record length is {} but the requested column is {}",
line,
record.len(),
i,
)
})
.and_then(|entry| {
entry
.parse::<T>()
.map_err(|_| anyhow!("Could not parse record {line}"))
})
})
}
pub fn indexed_lines<T>(&self, i: usize) -> Result<HashMap<T, String>>
where
T: FromStr + Eq + Hash,
{
let keys: Vec<T> = self.column(i)?;
let lines: Vec<String> = std::fs::read_to_string(&self.path)?
.lines()
.map(|s| s.to_string())
.collect();
if lines.is_empty() {
Ok(HashMap::new())
} else {
Ok(keys.into_iter().zip(lines[1..].to_vec()).collect())
}
}
}
pub fn clean_string_to_csv(s: &str) -> String {
s.replace("\"", "")
.replace(",", " ")
.lines()
.collect::<Vec<&str>>()
.join(" ")
.trim()
.to_string()
}
#[cfg(test)]
mod tests {
use std::net::IpAddr;
use anyhow::ensure;
use super::*;
#[test]
fn new_test() -> Result<()> {
let before = std::fs::read_to_string("tests/data/small_file.csv")?;
CSVFile::new("tests/data/small_file.csv", FileMode::Read)?;
CSVFile::new("tests/data/small_file.csv", FileMode::Append)?;
let after = std::fs::read_to_string("tests/data/small_file.csv")?;
assert_eq!(before, after);
CSVFile::new("tests/data/empty.csv", FileMode::Overwrite)?;
ensure!(CSVFile::new("tests/data/non_existent.csv", FileMode::Read).is_err());
CSVFile::new("tests/data/non_existent.csv", FileMode::Append)?;
delete_file("tests/data/non_existent.csv", false)?;
CSVFile::new("tests/data/non_existent.csv", FileMode::Overwrite)?;
delete_file("tests/data/non_existent.csv", false)
}
#[test]
fn read_test() -> Result<()> {
CSVFile::new("tests/data/small_file.csv", FileMode::Read)?.read()?;
ensure!(CSVFile::new("tests/data/small_file.csv", FileMode::Append)?
.read()
.is_err());
ensure!(CSVFile::new("tests/data/empty.csv", FileMode::Overwrite)?
.read()
.is_err());
Ok(())
}
#[test]
fn empty_column_test() -> Result<()> {
let file = CSVFile::new("tests/data/empty.csv", FileMode::Read)?;
for i in 0..10 {
let ids = file.column::<u64>(i)?;
ensure!(ids.is_empty());
let ids = file.column::<u64>(i)?;
ensure!(ids.is_empty());
}
Ok(())
}
#[test]
fn column_test() -> Result<()> {
let file = CSVFile::new("tests/data/small_file.csv", FileMode::Read)?;
let ids = file.column::<usize>(0)?;
assert_eq!(ids, vec![0, 1, 2, 3]);
let names = file.column::<String>(1)?;
assert_eq!(names, vec!["a", "b", "c", "d"]);
let forks = file.column::<u8>(2)?;
assert_eq!(forks, vec![1, 0, 1, 0]);
let ips = file.column::<IpAddr>(3);
ensure!(ips.is_err());
let ids = file.column::<i32>(1);
ensure!(ids.is_err());
let file = CSVFile::new("tests/data/invalid_csv.csv", FileMode::Read)?;
ensure!(file.column::<i8>(0).is_err());
Ok(())
}
#[test]
fn indexed_lines_test() -> Result<()> {
let file = CSVFile::new("tests/data/small_file.csv", FileMode::Read)?;
let indexed_lines = file.indexed_lines::<i32>(0)?;
assert_eq!(indexed_lines.len(), 4);
assert_eq!(
indexed_lines
.get(&0)
.with_context(|| "Could not find index 0")?,
"0,a,1"
);
assert_eq!(
indexed_lines
.get(&1)
.with_context(|| "Could not find index 1")?,
"1,b,0"
);
assert_eq!(
indexed_lines
.get(&2)
.with_context(|| "Could not find index 2")?,
"2,c,1"
);
assert_eq!(
indexed_lines
.get(&3)
.with_context(|| "Could not find index 3")?,
"3,d,0"
);
let indexed_lines = file.indexed_lines::<String>(1)?;
assert_eq!(indexed_lines.len(), 4);
assert_eq!(
indexed_lines
.get("a")
.with_context(|| "Could not find index 'a'")?,
"0,a,1"
);
assert_eq!(
indexed_lines
.get("b")
.with_context(|| "Could not find index 'b'")?,
"1,b,0"
);
assert_eq!(
indexed_lines
.get("c")
.with_context(|| "Could not find index 'c'")?,
"2,c,1"
);
assert_eq!(
indexed_lines
.get("d")
.with_context(|| "Could not find index 'd'")?,
"3,d,0"
);
ensure!(file.indexed_lines::<bool>(3).is_err());
ensure!(file.indexed_lines::<u64>(1).is_err());
let empty = CSVFile::new("tests/data/empty.csv", FileMode::Read)?;
let indexed_lines = empty.indexed_lines::<IpAddr>(0)?;
assert_eq!(indexed_lines.len(), 0);
Ok(())
}
}