use std;
use std::collections::HashMap;
use std::str::FromStr;
use std::ops::Index;
use std::vec::IntoIter;
use num::traits::{One, Zero};
use error::DataError;
pub struct DataTable {
pub data_cols: Vec<DataColumn>,
}
impl DataTable {
pub fn empty() -> DataTable {
DataTable { data_cols: Vec::new() }
}
pub fn cols(&self) -> usize {
self.data_cols.len()
}
pub fn rows(&self) -> usize {
if self.data_cols.len() > 0 {
return self.data_cols[0].len();
}
0usize
}
pub fn shrink_to_fit(&mut self) {
for col in self.data_cols.iter_mut() {
col.shrink_to_fit();
}
self.data_cols.shrink_to_fit();
}
pub fn into_consistent_data<T: FromStr>(self, row_major: bool) -> Result<Vec<T>, DataError> {
let cols = self.cols();
let rows = self.rows();
let mut table_data = Vec::with_capacity(cols * rows);
if row_major {
let mut column_iters = Vec::new();
for d in self.data_cols.into_iter() {
column_iters.push(d.into_iter_cast::<T>());
}
for _ in 0..rows {
for i in 0..cols {
match column_iters[i].next() {
Some(Ok(x)) => table_data.push(x),
Some(Err(_)) => return Err(DataError::DataCastError),
None =>{},
}
}
}
}
else {
for d in self.data_cols.into_iter() {
match d.into_vec() {
Ok(x) => table_data.extend(x),
Err(e) => return Err(e),
}
}
}
if table_data.len() != cols*rows {
return Err(DataError::InvalidStateError);
}
Ok(table_data)
}
}
impl Index<usize> for DataTable {
type Output = DataColumn;
fn index(&self, idx: usize) -> &DataColumn {
&self.data_cols[idx]
}
}
pub struct DataColumn {
pub name: Option<String>,
categories: Option<HashMap<String, usize>>,
data: Vec<String>,
}
impl DataColumn {
pub fn empty() -> DataColumn {
DataColumn {
name: None,
categories: None,
data: Vec::new(),
}
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn data(&self) -> &Vec<String> {
&self.data
}
pub fn categories(&self) -> Option<HashMap<String, usize>> {
match self.categories {
None => None,
Some(ref x) => Some(x.clone()),
}
}
pub fn update_categories(&mut self) {
let mut categories = HashMap::new();
let mut count = 0usize;
for s in self.data.iter() {
if !categories.contains_key(s) {
categories.insert(s.clone(), count);
count += 1usize;
}
}
categories.shrink_to_fit();
self.categories = Some(categories);
}
pub fn numeric_category_data<T: Zero + One>(&self) -> Result<Vec<Vec<T>>, DataError> {
if let Some(ref categories) = self.categories {
let mut outer_vec = Vec::new();
for _ in 0..categories.len() {
outer_vec.push(Vec::<T>::new())
}
for d in self.data.iter() {
match categories.get(d) {
Some(x) => {
for i in 0..categories.len() {
if *x == i {
outer_vec[i].push(T::one());
} else {
outer_vec[i].push(T::zero());
}
}
}
None => {
return Err(DataError::InvalidStateError);
}
}
}
return Ok(outer_vec);
}
Err(DataError::InvalidStateError)
}
pub fn push(&mut self, val: String) {
self.data.push(val);
}
pub fn get_as<T: FromStr>(&self, idx: usize) -> Result<T, DataError> {
match T::from_str(self.data[idx].as_ref()) {
Ok(x) => Ok(x),
Err(_) => Err(DataError::DataCastError),
}
}
pub fn shrink_to_fit(&mut self) {
self.data.shrink_to_fit();
}
pub fn into_vec<T: FromStr>(self) -> Result<Vec<T>, DataError> {
let mut casted_data = Vec::<T>::with_capacity(self.data.len());
for d in self.data.into_iter() {
match T::from_str(d.as_ref()) {
Ok(x) => casted_data.push(x),
Err(_) => return Err(DataError::DataCastError),
}
}
Ok(casted_data)
}
pub fn cast<T: FromStr>(&self) -> Option<Vec<T>> {
let mut casted_data = Vec::<T>::with_capacity(self.data.len());
for d in self.data.iter() {
match T::from_str(&d[..]) {
Ok(x) => casted_data.push(x),
Err(_) => return None,
}
}
Some(casted_data)
}
pub fn into_iter_cast<U: FromStr>
(self)
-> std::iter::Map<IntoIter<String>, fn(String) -> Result<U, <U as FromStr>::Err>>
where U: FromStr
{
from_str_iter::<_, U>(self.data.into_iter())
}
}
fn from_str_iter<I, U>
(iter: I)
-> std::iter::Map<I, fn(<I as Iterator>::Item) -> Result<U, <U as FromStr>::Err>>
where I: Iterator,
<I as Iterator>::Item: AsRef<str>,
U: FromStr
{
fn from_str_fn<T, U>(item: T) -> Result<U, <U as FromStr>::Err>
where T: AsRef<str>,
U: FromStr
{
FromStr::from_str(item.as_ref())
}
iter.map(from_str_fn)
}
impl Index<usize> for DataColumn {
type Output = String;
fn index(&self, idx: usize) -> &String {
&self.data[idx]
}
}