rusty_data/
datatable.rs

1//! The datatable module.
2//!
3//! Contains the DataTable struct and provides methods
4//! for converting the tables to various formats.
5
6use std;
7use std::collections::HashMap;
8use std::str::FromStr;
9use std::ops::Index;
10use std::vec::IntoIter;
11
12use num::traits::{One, Zero};
13
14use error::DataError;
15
16/// A data table consisting of varying column types and headers.
17pub struct DataTable {
18    /// Vector of DataColumns.
19    pub data_cols: Vec<DataColumn>,
20}
21
22impl DataTable {
23    /// Constructs an empty DataTable
24    pub fn empty() -> DataTable {
25        DataTable { data_cols: Vec::new() }
26    }
27
28    /// The number of columns in the DataTable.
29    pub fn cols(&self) -> usize {
30        self.data_cols.len()
31    }
32
33    /// The number of rows in the DataTable.
34    pub fn rows(&self) -> usize {
35        if self.data_cols.len() > 0 {
36            return self.data_cols[0].len();
37        }
38
39        0usize
40    }
41
42    /// Shrinks the table and it's underlying columns.
43    pub fn shrink_to_fit(&mut self) {
44        for col in self.data_cols.iter_mut() {
45            col.shrink_to_fit();
46        }
47
48        self.data_cols.shrink_to_fit();
49    }
50
51    /// Consumes self and attempts to convert the DataTable into a single Vec.
52    ///
53    /// Uses column major ordering.
54    ///
55    /// # Failures
56    ///
57    /// - DataCastError : Returned when the data cannot be cast into the requested type.
58    pub fn into_consistent_data<T: FromStr>(self, row_major: bool) -> Result<Vec<T>, DataError> {
59        let cols = self.cols();
60        let rows = self.rows();
61
62        let mut table_data = Vec::with_capacity(cols * rows);
63        if row_major {
64            let mut column_iters = Vec::new();
65
66            for d in self.data_cols.into_iter() {
67                column_iters.push(d.into_iter_cast::<T>());
68            }
69
70            for _ in 0..rows {
71                for i in 0..cols {
72                    match column_iters[i].next() {
73                        Some(Ok(x)) => table_data.push(x),
74                        Some(Err(_)) => return Err(DataError::DataCastError),
75                        None =>{},
76                    }
77                }
78            }
79        }
80        else {
81            for d in self.data_cols.into_iter() {
82                match d.into_vec() {
83                    Ok(x) => table_data.extend(x),
84                    Err(e) => return Err(e),
85                }
86            }
87        }
88
89        if table_data.len() != cols*rows {
90            return Err(DataError::InvalidStateError);
91        }
92        
93
94        Ok(table_data)
95    }
96}
97
98impl Index<usize> for DataTable { 
99    type Output = DataColumn;
100
101    fn index(&self, idx: usize) -> &DataColumn {
102        &self.data_cols[idx]
103    }
104}
105
106/// A data column consisting of Strings. 
107pub struct DataColumn {
108    /// The name associated with the DataColumn.
109    pub name: Option<String>,
110    categories: Option<HashMap<String, usize>>,
111    data: Vec<String>,
112}
113
114impl DataColumn {
115    /// Constructs an empty data column.
116    pub fn empty() -> DataColumn {
117        DataColumn {
118            name: None,
119            categories: None,
120            data: Vec::new(),
121        }
122    }
123
124    /// Gets the length of the data column.
125    pub fn len(&self) -> usize {
126        self.data.len()
127    }
128
129    /// Gets an immutable reference to the underlying data.
130    pub fn data(&self) -> &Vec<String> {
131        &self.data
132    }
133
134    /// Gets an immutable reference to the categories Option.
135    pub fn categories(&self) -> Option<HashMap<String, usize>> {
136        match self.categories {
137            None => None,
138            Some(ref x) => Some(x.clone()),
139        }
140    }
141
142    /// Update the categories set using the current data.
143    ///
144    /// # Examples
145    ///
146    /// ```
147    /// use rusty_data::datatable::DataColumn;
148    ///
149    /// let mut dc = DataColumn::empty();
150    ///
151    /// dc.push("Class1".to_string());
152    /// dc.push("Class2".to_string());
153    /// dc.push("Class2".to_string());
154    ///
155    /// dc.update_categories();
156    /// let categories = dc.categories().unwrap();
157    ///
158    /// // Note that `contains` requires a reference so we pass an &str.
159    /// assert!(categories.contains_key("Class2"));
160    /// assert_eq!(categories.len(), 2);
161    /// ```
162    pub fn update_categories(&mut self) {
163        let mut categories = HashMap::new();
164        let mut count = 0usize;
165
166        for s in self.data.iter() {
167            if !categories.contains_key(s) {
168                categories.insert(s.clone(), count);
169                count += 1usize;
170            }
171
172        }
173        categories.shrink_to_fit();
174        self.categories = Some(categories);
175    }
176
177    /// Produce a numerical vector representation of the category data.
178    ///
179    /// # Examples
180    ///
181    /// ```
182    /// use rusty_data::datatable::DataColumn;
183    ///
184    /// let mut dc = DataColumn::empty();
185    ///
186    /// dc.push("Class1".to_string());
187    /// dc.push("Class2".to_string());
188    /// dc.push("Class2".to_string());
189    ///
190    /// dc.update_categories();
191    ///
192    /// let data = dc.numeric_category_data::<f64>().unwrap();
193    ///
194    /// println!("The data is: {:?}", data);
195    /// ```
196    pub fn numeric_category_data<T: Zero + One>(&self) -> Result<Vec<Vec<T>>, DataError> {
197        if let Some(ref categories) = self.categories {
198            let mut outer_vec = Vec::new();
199
200            for _ in 0..categories.len() {
201                outer_vec.push(Vec::<T>::new())
202            }
203
204            for d in self.data.iter() {
205                match categories.get(d) {
206                    Some(x) => {
207                        for i in 0..categories.len() {
208                            if *x == i {
209                                outer_vec[i].push(T::one());
210                            } else {
211                                outer_vec[i].push(T::zero());
212                            }
213                        }
214                    }
215                    None => {
216                        return Err(DataError::InvalidStateError);
217                    }
218                }
219            }
220            return Ok(outer_vec);
221        }
222
223        Err(DataError::InvalidStateError)
224    }
225
226    /// Pushes a new &str to the column.
227    pub fn push(&mut self, val: String) {
228        self.data.push(val);
229    }
230
231    /// Try to get the element at the index as the requested type.
232    ///
233    /// # Failures
234    ///
235    /// - DataCastError : The element at the given index could not be parsed to this type. 
236    pub fn get_as<T: FromStr>(&self, idx: usize) -> Result<T, DataError> {
237        match T::from_str(self.data[idx].as_ref()) {
238            Ok(x) => Ok(x),
239            Err(_) => Err(DataError::DataCastError),
240        }
241    }
242
243    /// Shrink the column to fit the data.
244    pub fn shrink_to_fit(&mut self) {
245        self.data.shrink_to_fit();
246    }
247
248    /// Consumes self and returns a Vec of the requested type.
249    ///
250    /// # Failures
251    ///
252    /// - DataCastError : Returned when the data cannot be parsed to the requested type.
253    pub fn into_vec<T: FromStr>(self) -> Result<Vec<T>, DataError> {
254        let mut casted_data = Vec::<T>::with_capacity(self.data.len());
255
256        for d in self.data.into_iter() {
257            match T::from_str(d.as_ref()) {
258                Ok(x) => casted_data.push(x),
259                Err(_) => return Err(DataError::DataCastError),
260            }
261        }
262
263        Ok(casted_data)
264    }
265
266    /// Cast the data to the requested type.
267    ///
268    /// Returns a Vec of the requested type wrapped in an option.
269    pub fn cast<T: FromStr>(&self) -> Option<Vec<T>> {
270        let mut casted_data = Vec::<T>::with_capacity(self.data.len());
271
272        for d in self.data.iter() {
273            match T::from_str(&d[..]) {
274                Ok(x) => casted_data.push(x),
275                Err(_) => return None,
276            }
277        }
278
279        Some(casted_data)
280    }
281
282    /// Consumes self and returns an iterator which parses
283    /// the data to the specified type returning results.
284    ///
285    /// The iterator will return a result on `next()` detailing
286    /// the outcome of the parse.
287    pub fn into_iter_cast<U: FromStr>
288        (self)
289         -> std::iter::Map<IntoIter<String>, fn(String) -> Result<U, <U as FromStr>::Err>>
290        where U: FromStr
291    {
292        from_str_iter::<_, U>(self.data.into_iter())
293    }
294}
295
296/// Converts the iterator to a FromStr iterator.
297fn from_str_iter<I, U>
298    (iter: I)
299     -> std::iter::Map<I, fn(<I as Iterator>::Item) -> Result<U, <U as FromStr>::Err>>
300    where I: Iterator,
301          <I as Iterator>::Item: AsRef<str>,
302          U: FromStr
303{
304    fn from_str_fn<T, U>(item: T) -> Result<U, <U as FromStr>::Err>
305        where T: AsRef<str>,
306              U: FromStr
307    {
308        FromStr::from_str(item.as_ref())
309    }
310    iter.map(from_str_fn)
311}
312
313impl Index<usize> for DataColumn { 
314    type Output = String;
315    fn index(&self, idx: usize) -> &String {
316        &self.data[idx]
317    }
318}