extern crate csv;
use self::csv::{ReaderBuilder, WriterBuilder};
use indexmap::{map::Keys, IndexMap};
use std::cmp::{max, min};
use std::collections::HashMap;
use std::error::Error;
use std::ops::{Index, IndexMut};
use std::{fmt, fmt::Debug};
use crate::structure::matrix::{matrix, Matrix, Shape::*};
use crate::util::useful::tab;
use json::JsonValue;
#[derive(Debug, Clone)]
pub struct DataFrame {
pub data: IndexMap<String, Vec<f64>>,
}
impl PartialEq for DataFrame {
fn eq(&self, other: &Self) -> bool {
self.data == other.data
}
}
impl fmt::Display for DataFrame {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.spread())
}
}
impl Index<&str> for DataFrame {
type Output = Vec<f64>;
fn index(&self, index: &str) -> &Self::Output {
self.get(index).unwrap()
}
}
impl IndexMut<&str> for DataFrame {
fn index_mut(&mut self, index: &str) -> &mut Self::Output {
match self.data.get_mut(index) {
Some(v) => v,
None => panic!("There are no corresponding value"),
}
}
}
impl Into<Matrix> for DataFrame {
fn into(self) -> Matrix {
self.to_matrix()
}
}
impl Into<Matrix> for &DataFrame {
fn into(self) -> Matrix {
self.to_matrix()
}
}
#[allow(unused_parens)]
impl DataFrame {
pub fn new() -> Self {
DataFrame {
data: IndexMap::new(),
}
}
pub fn with_header(header: Vec<&str>) -> Self {
let l = header.len();
let mut data = IndexMap::with_capacity(l);
for i in 0..l {
data.insert(header[i].to_string(), vec![]);
}
DataFrame { data }
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn insert(&mut self, key: &str, value: Vec<f64>) {
self.data.insert(key.to_owned(), value);
}
pub fn insert_row(&mut self, value: Vec<f64>) {
assert_eq!(self.data.len(), value.len());
for (v, val) in self.data.values_mut().zip(value) {
v.push(val);
}
}
pub fn get(&self, head: &str) -> Option<&Vec<f64>> {
self.data.get(&head.to_string())
}
pub fn headers(&self) -> Keys<String, Vec<f64>> {
self.data.keys()
}
pub fn set_header(&mut self, header: Vec<&str>) {
for i in 0..header.len() {
match self.data.get_index_mut(i) {
Some((k, _)) => {
*k = header[i].to_string();
}
None => panic!("New header is longer than original header"),
}
}
}
pub fn to_matrix(&self) -> Matrix {
let mut data: Vec<f64> = vec![];
let mut r = 0usize;
let mut c = 0usize;
self.data.values().for_each(|v| {
if r == 0 {
r = v.len();
} else {
assert_eq!(r, v.len());
}
c += 1;
data.extend(v);
});
matrix(data, r, c, Col)
}
pub fn from_matrix(mat: Matrix) -> Self {
let mut df: DataFrame = DataFrame::new();
for i in 0..mat.col {
df.insert(format!("{}", i).as_str(), mat.col(i));
}
df
}
pub fn spread(&self) -> String {
let r: usize = self.data.values().fold(0, |max_len, column| max(max_len, column.len()));
let mut result = String::new();
if r > 100 {
let lc1 = ((r as f64).log10() as usize) + 5;
result.push_str(&tab("", lc1));
let mut space_map: IndexMap<String, usize> = IndexMap::new();
for k in self.data.keys() {
let v = &self[&k];
let mut space = 0usize;
for elem in v.clone().into_iter().take(5) {
space = max(space, min(format!("{:.4}", elem).len(), elem.to_string().len()));
}
if v.len() >= r-5 {
for elem in v.into_iter().skip(r-5) {
space = max(space, min(format!("{:.4}", elem).len(), elem.to_string().len()));
}
}
space = max(space + 1, 5);
if k.len() >= space {
space = k.len() + 1;
}
result.push_str(&tab(k, space));
space_map.insert(k.to_string(), space);
}
result.push('\n');
for i in 0..5 {
result.push_str(&tab(&format!("r[{}]", i), lc1));
for k in self.data.keys() {
let v = &self[&k];
let space = space_map[k];
if i < v.len() {
let elem = v[i];
let st1 = format!("{:.4}", elem);
let st2 = elem.to_string();
let mut st = st2.clone();
if st1.len() < st2.len() {
st = st1;
}
result.push_str(&tab(&st, space));
} else {
result.push_str(&tab("", space));
}
}
result.push('\n');
}
result.push_str(&tab("...", lc1));
for k in self.data.keys() {
let space = space_map[k];
result.push_str(&tab("...", space));
}
result.push('\n');
for i in r - 5..r {
result.push_str(&tab(&format!("r[{}]", i), lc1));
for k in self.data.keys() {
let v = &self[&k];
let space = space_map[k];
if i < v.len() {
let elem = v[i];
let st1 = format!("{:.4}", elem);
let st2 = elem.to_string();
let mut st = st2.clone();
if st1.len() < st2.len() {
st = st1;
}
result.push_str(&tab(&st, space));
} else {
result.push_str(&tab("", space));
}
}
if i == r - 1 {
break;
}
result.push('\n');
}
return result;
}
result.push_str(&tab("", 5));
let mut space_map: IndexMap<String, usize> = IndexMap::new();
for k in self.data.keys() {
let value = &self[&k];
let mut space = 0usize;
for elem in value {
space = max(space, min(format!("{:.4}", elem).len(), elem.to_string().len()));
}
space = max(space + 1, 5);
if k.len() >= space {
space = k.len() + 1;
}
result.push_str(&tab(k, space));
space_map.insert(k.to_string(), space);
}
result.push('\n');
for i in 0..r {
result.push_str(&tab(&format!("r[{}]", i), 5));
for k in self.data.keys() {
let v = &self[&k];
let space = space_map[k];
if i < v.len() {
let elem = v[i];
let st1 = format!("{:.4}", elem);
let st2 = elem.to_string();
let mut st = st2.clone();
if st1.len() < st2.len() {
st = st1;
}
result.push_str(&tab(&st, space));
} else {
result.push_str(&tab("", space));
}
}
if i == (r - 1) {
break;
}
result.push('\n');
}
result
}
}
pub trait WithCSV: Sized {
fn write_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>>;
}
impl WithCSV for DataFrame {
fn write_csv(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
let mut wtr = WriterBuilder::new().from_path(file_path)?;
let r: usize = self.data.values().fold(0, |max_len, column| max(max_len, column.len()));
let c: usize = self.data.len();
wtr.write_record(
self.data
.keys()
.map(|x| x.to_string())
.collect::<Vec<String>>(),
)?;
for i in 0..r {
let mut record: Vec<String> = vec!["".to_string(); c];
for (j, v) in self.data.values().enumerate() {
if i < v.len() {
record[j] = v[i].to_string();
}
}
wtr.write_record(record)?;
}
wtr.flush()?;
Ok(())
}
fn read_csv(file_path: &str, delimiter: char) -> Result<Self, Box<dyn Error>> {
let mut rdr = ReaderBuilder::new()
.has_headers(true)
.delimiter(delimiter as u8)
.from_path(file_path)?;
let headers_vec = rdr.headers()?;
let headers = headers_vec.iter().map(|x| x).collect::<Vec<&str>>();
let mut result = DataFrame::with_header(headers);
for rec in rdr.deserialize() {
let record: HashMap<String, String> = rec?;
for head in record.keys() {
let value = &record[head];
if value.len() > 0 {
(&mut result[&head]).push(value.parse::<f64>().unwrap());
}
}
}
Ok(result)
}
}
pub trait WithNetCDF: Sized {
fn write_nc(&self, file_path: &str) -> Result<(), Box<dyn Error>>;
fn read_nc(file_path: &str) -> Result<Self, Box<dyn Error>>;
fn read_nc_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>>;
}
impl WithNetCDF for DataFrame {
fn write_nc(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
let mut f = netcdf::create(file_path)?;
for (i, (k, v)) in self.data.iter().enumerate() {
let dim_name = format!("{}th col", i);
let dim = v.len();
f.add_dimension(&dim_name, dim)?;
let var = &mut f.add_variable::<f64>(k, &[&dim_name])?;
var.put_values(v, None, None)?;
}
Ok(())
}
fn read_nc(file_path: &str) -> Result<Self, Box<dyn Error>> {
let f = netcdf::open(file_path)?;
let mut df = DataFrame::new();
for (k, v) in f.variables().iter() {
let mut data: Vec<f64> = vec![0.0; v.len()];
v.values_to(&mut data, None, None)?;
df.insert(k, data);
}
Ok(df)
}
fn read_nc_by_header(file_path: &str, header: Vec<&str>) -> Result<Self, Box<dyn Error>> {
let f = netcdf::open(file_path)?;
let mut df = DataFrame::with_header(header.clone());
for k in header {
let val = match f.variables().get(k) {
Some(v) => v,
None => panic!("There are no corresponding values"),
};
let mut data: Vec<f64> = vec![0.0; val.len()];
val.values_to(&mut data, None, None)?;
df[k] = data;
}
Ok(df)
}
}
pub trait WithJSON {
fn to_json_value(&self) -> JsonValue;
fn from_json_value(val: JsonValue) -> Self;
}
impl WithJSON for DataFrame {
fn to_json_value(&self) -> JsonValue {
let r = self.data.values().fold(0, |max_len, column| max(max_len, column.len()));
let mut values = Vec::<JsonValue>::new();
for i in 0 .. r {
let mut row_object = JsonValue::new_object();
for head in self.headers() {
row_object.insert(head, self.data[head][i]).expect("Can't insert row object");
}
values.push(row_object);
}
JsonValue::Array(values)
}
fn from_json_value(val: JsonValue) -> Self {
unimplemented!()
}
}