use csv::{ReaderBuilder, Writer};
use std::collections::HashMap;
use std::fs::File;
use std::path::Path;
use crate::error::{PandRSError, Result};
use crate::series::{CategoricalOrder, Series, StringCategorical};
use crate::DataFrame;
pub fn read_csv<P: AsRef<Path>>(path: P, has_header: bool) -> Result<DataFrame> {
let file = File::open(path.as_ref()).map_err(PandRSError::Io)?;
let mut rdr = ReaderBuilder::new()
.has_headers(has_header)
.flexible(true)
.trim(csv::Trim::All)
.from_reader(file);
let mut df = DataFrame::new();
let headers: Vec<String> = if has_header {
rdr.headers()
.map_err(PandRSError::Csv)?
.iter()
.map(|h| h.to_string())
.collect()
} else {
if let Some(first_record_result) = rdr.records().next() {
let first_record = first_record_result.map_err(PandRSError::Csv)?;
(0..first_record.len())
.map(|i| format!("column_{}", i))
.collect()
} else {
return Ok(DataFrame::new());
}
};
let mut columns: HashMap<String, Vec<String>> = HashMap::new();
for header in &headers {
columns.insert(header.clone(), Vec::new());
}
for result in rdr.records() {
let record = result.map_err(PandRSError::Csv)?;
for (i, header) in headers.iter().enumerate() {
if i < record.len() {
if let Some(col) = columns.get_mut(header) {
col.push(record[i].to_string());
}
} else {
if let Some(col) = columns.get_mut(header) {
col.push(String::new());
}
}
}
}
for header in headers {
if let Some(values) = columns.remove(&header) {
let series = Series::new(values, Some(header.clone()))?;
df.add_column(header, series)?;
}
}
Ok(df)
}
pub fn write_csv<P: AsRef<Path>>(df: &DataFrame, path: P) -> Result<()> {
let file = File::create(path.as_ref()).map_err(PandRSError::Io)?;
let mut wtr = Writer::from_writer(file);
wtr.write_record(df.column_names())
.map_err(PandRSError::Csv)?;
let row_count = df.row_count();
if row_count == 0 {
wtr.flush().map_err(PandRSError::Io)?;
return Ok(());
}
for i in 0..row_count {
let mut row = Vec::new();
for col_name in df.column_names() {
row.push(String::new());
}
wtr.write_record(&row).map_err(PandRSError::Csv)?;
}
wtr.flush().map_err(PandRSError::Io)?;
Ok(())
}