use std::ffi::OsStr;
use std::path::Path;
use crate::prelude::*;
#[derive(Clone)]
pub struct Reader {
path: String,
delimiter: u8,
terminator: csv::Terminator,
quote: u8,
has_headers: bool,
header: Option<Vec<String>>,
}
#[derive(Clone)]
pub struct Writer {
path: String,
delimiter: u8,
terminator: csv::Terminator,
quote: u8,
has_headers: bool,
}
impl Reader {
pub fn new<S: AsRef<OsStr> + ToString>(path: &S) -> Self {
Reader {
path: path.to_string(),
delimiter: b',',
terminator: csv::Terminator::CRLF,
quote: b'"',
has_headers: true,
header: None,
}
}
pub fn headers(self, header: Vec<String>) -> Self {
let mut rdr = self;
rdr.header = Some(header);
rdr
}
pub fn has_headers(self, yes: bool) -> Self {
let mut rdr = self;
rdr.has_headers = yes;
rdr
}
pub fn quote(self, quote: u8) -> Self {
let mut rdr = self;
rdr.quote = quote;
rdr
}
pub fn delimiter(self, delimiter: u8) -> Self {
let mut rdr = self;
rdr.delimiter = delimiter;
rdr
}
pub fn terminator(self, terminator: u8) -> Self {
let mut rdr = self;
rdr.terminator = csv::Terminator::Any(terminator);
rdr
}
pub fn read(&self) -> Result<DataFrame<i32>, BlackJackError> {
use flate2::read::GzDecoder;
use std::fs::File;
use std::io::prelude::*;
let p = Path::new(&self.path);
let file_reader: Box<Read> = if self.path.to_string().to_lowercase().ends_with(".gz") {
Box::new(GzDecoder::new(File::open(p)?))
} else {
Box::new(File::open(p)?)
};
let mut reader = csv::ReaderBuilder::new()
.quote(self.quote)
.has_headers(self.has_headers)
.delimiter(self.delimiter)
.terminator(self.terminator)
.from_reader(file_reader);
let headers: Vec<String> = if self.has_headers {
reader
.headers()?
.clone()
.into_iter()
.map(|v| v.to_string())
.collect()
} else {
match &self.header {
Some(header) => header.to_owned(),
None => {
return Err(BlackJackError::ValueError(
r#"Reader specifies file does not have headers,
but no headers were supplied with Reader::header()"#
.to_owned(),
));
}
}
};
let mut vecs: Vec<Vec<String>> = (0..headers.len()).map(|_| Vec::new()).collect();
for record in reader.records() {
match record {
Ok(rec) => {
for (field, container) in rec.iter().zip(&mut vecs) {
container.push(field.into());
}
}
Err(err) => println!("Unable to read record: '{}'", err),
}
}
let mut df = DataFrame::new();
let _ = headers
.into_iter()
.zip(vecs)
.map(|(header, vec)| {
let mut series = Series::from_vec(vec);
series.set_name(&header);
if let Ok(ser) = series.astype::<i32>() {
df.add_column(ser).unwrap();
} else if let Ok(ser) = series.astype::<f32>() {
df.add_column(ser).unwrap()
} else {
df.add_column(series).unwrap()
}
})
.collect::<Vec<()>>();
Ok(df)
}
}
impl Writer {
pub fn new<S: AsRef<OsStr> + ToString>(path: &S) -> Self {
Writer {
path: path.to_string(),
delimiter: b',',
terminator: csv::Terminator::CRLF,
quote: b'"',
has_headers: true,
}
}
pub fn has_headers(self, yes: bool) -> Self {
let mut wtr = self;
wtr.has_headers = yes;
wtr
}
pub fn quote(self, quote: u8) -> Self {
let mut wtr = self;
wtr.quote = quote;
wtr
}
pub fn delimiter(self, delimiter: u8) -> Self {
let mut wtr = self;
wtr.delimiter = delimiter;
wtr
}
pub fn terminator(self, terminator: u8) -> Self {
let mut wtr = self;
wtr.terminator = csv::Terminator::Any(terminator);
wtr
}
pub fn write<I: PartialEq + PartialOrd + BlackJackData>(
&self,
df: DataFrame<I>,
) -> Result<(), BlackJackError> {
use flate2::read::GzEncoder;
use flate2::Compression;
use std::fs::File;
use std::io::prelude::*;
let p = Path::new(&self.path);
let file_writer: Box<Write> = if self.path.to_string().to_lowercase().ends_with(".gz") {
Box::new(GzEncoder::new(File::create(p)?, Compression::default()))
} else {
Box::new(File::create(p)?)
};
let mut writer = csv::WriterBuilder::new()
.delimiter(self.delimiter)
.has_headers(self.has_headers)
.quote(self.quote)
.terminator(self.terminator)
.from_writer(file_writer);
let header = df.columns().map(|v| v.to_string()).collect::<Vec<String>>();
let mut data = vec![];
for col_name in df.data.keys() {
let series_container = df.get_column_infer(col_name.as_str()).unwrap();
let string_vec = series_container.into_string_vec();
data.push(string_vec);
}
if self.has_headers {
writer.write_record(header.as_slice())?;
};
for row_idx in 0..data[0].len() {
let mut row = vec![];
for column_idx in 0..data.len() {
row.push(&data[column_idx][row_idx]);
}
writer.write_record(row.as_slice())?;
}
Ok(())
}
}