mifi_rs/
dataframe.rs

1use std::error::Error;
2use std::ops::Index;
3
4type DataInteger = i32;
5type DataFloat = f64;
6type DataText = String;
7type DataBool = bool;
8
9#[derive(Debug)]
10pub struct ColumnInteger {
11    name: String,
12    data: Vec<DataInteger>,
13}
14
15#[derive(Debug)]
16pub struct ColumnFloat {
17    name: String,
18    data: Vec<DataFloat>,
19}
20
21#[derive(Debug)]
22pub struct ColumnText {
23    name: String,
24    data: Vec<DataText>,
25}
26
27#[derive(Debug)]
28pub struct ColumnBool {
29    name: String,
30    data: Vec<DataBool>,
31}
32
33#[derive(Debug)]
34pub struct DataFrame {
35    columns: Vec<DataColumn>,
36}
37
38impl Index<&str> for DataFrame {
39    type Output = DataColumn;
40
41    fn index(&self, name: &str) -> &Self::Output {
42        for col in &self.columns {
43            match col {
44                DataColumn::IntegerDataColumn(c) => {
45                    if c.name == name {
46                        return &col;
47                    }
48                }
49                DataColumn::TextDataColumn(c) => {
50                    if c.name == name {
51                        return &col;
52                    }
53                }
54                DataColumn::FloatDataColumn(c) => {
55                    if c.name == name {
56                        return &col;
57                    }
58                }
59                DataColumn::BoolDataColumn(c) => {
60                    if c.name == name {
61                        return &col;
62                    }
63                }
64            }
65        }
66        panic!("unknown column name")
67    }
68}
69
70#[derive(Debug)]
71pub enum DataColumn {
72    IntegerDataColumn(ColumnInteger),
73    TextDataColumn(ColumnText),
74    FloatDataColumn(ColumnFloat),
75    BoolDataColumn(ColumnBool),
76}
77
78#[derive(Debug)]
79struct DataFrameError {
80    msg: String,
81}
82
83impl DataFrameError {
84    fn create(msg: &str) -> Box<dyn Error> {
85        Box::new(DataFrameError {
86            msg: msg.to_owned(),
87        })
88    }
89}
90
91impl std::fmt::Display for DataFrameError {
92    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
93        write!(f, "{}", self.msg)
94    }
95}
96
97impl Error for DataFrameError {
98    fn source(&self) -> Option<&(dyn Error + 'static)> {
99        // Generic error, underlying cause isn't tracked.
100        None
101    }
102}
103
104impl DataFrame {
105    pub fn new(
106        column_names: Vec<&str>,
107        data: Vec<Vec<DataCell>>,
108    ) -> Result<DataFrame, Box<dyn Error>> {
109        let num_cols = column_names.len();
110        let mut column_types = vec![];
111
112        // Figure out the column types from the data
113        if data.len() > 0 {
114            for i in 0..num_cols {
115                if i >= data[0].len() {
116                    // Default to integer
117                    column_types.push(DataTypes::Integer);
118                } else {
119                    column_types.push(data[0][i].data_type());
120                }
121            }
122        } else {
123            for _ in 0..num_cols {
124                column_types.push(DataTypes::Integer);
125            }
126        }
127
128        // create columns based on column types
129        let mut cols = Vec::<DataColumn>::new();
130        for (i, v) in column_types.iter().enumerate() {
131            match v {
132                DataTypes::Integer => cols.push(DataColumn::IntegerDataColumn(ColumnInteger {
133                    name: column_names[i].to_string(),
134                    data: vec![],
135                })),
136                DataTypes::Text => cols.push(DataColumn::TextDataColumn(ColumnText {
137                    name: column_names[i].to_string(),
138                    data: vec![],
139                })),
140                DataTypes::Float => cols.push(DataColumn::FloatDataColumn(ColumnFloat {
141                    name: column_names[i].to_string(),
142                    data: vec![],
143                })),
144                DataTypes::Bool => cols.push(DataColumn::BoolDataColumn(ColumnBool {
145                    name: column_names[i].to_string(),
146                    data: vec![],
147                })),
148            }
149        }
150
151        // Go through each data cell and if they can be added to the appropriate column, do it
152        for row in data.iter() {
153            if row.len() != num_cols {
154                return Err(DataFrameError::create(
155                    "length of data provided did not match expected number of columns",
156                ));
157            }
158
159            for (col_index, cell) in row.iter().enumerate() {
160                match &mut cols[col_index] {
161                    DataColumn::IntegerDataColumn(col) => match &cell {
162                        DataCell::IntegerDataCell(val) => col.data.push(val.clone()),
163                        _ => {
164                            return Err(DataFrameError::create(
165                                "data cell type did not match integer column type",
166                            ))
167                        }
168                    },
169                    DataColumn::TextDataColumn(col) => match &cell {
170                        DataCell::TextDataCell(val) => col.data.push(val.clone()),
171                        _ => {
172                            return Err(DataFrameError::create(
173                                "data cell type did not match text column type",
174                            ))
175                        }
176                    },
177                    DataColumn::FloatDataColumn(col) => match &cell {
178                        DataCell::FloatDataCell(val) => col.data.push(val.clone()),
179                        _ => {
180                            return Err(DataFrameError::create(
181                                "data cell type did not match float column type",
182                            ))
183                        }
184                    },
185                    DataColumn::BoolDataColumn(col) => match &cell {
186                        DataCell::BoolDataCell(val) => col.data.push(val.clone()),
187                        _ => {
188                            return Err(DataFrameError::create(
189                                "data cell type did not match bool column type",
190                            ))
191                        }
192                    },
193                }
194            }
195        }
196
197        Ok(DataFrame { columns: cols })
198    }
199}
200
201#[derive(Debug)]
202enum DataTypes {
203    Integer,
204    Text,
205    Float,
206    Bool,
207}
208
209#[derive(Debug)]
210pub enum DataCell {
211    IntegerDataCell(DataInteger),
212    TextDataCell(DataText),
213    FloatDataCell(DataFloat),
214    BoolDataCell(DataBool),
215}
216
217impl DataCell {
218    fn data_type(&self) -> DataTypes {
219        match self {
220            DataCell::IntegerDataCell(_) => DataTypes::Integer,
221            DataCell::TextDataCell(_) => DataTypes::Text,
222            DataCell::FloatDataCell(_) => DataTypes::Float,
223            DataCell::BoolDataCell(_) => DataTypes::Bool,
224        }
225    }
226}
227
228impl From<DataInteger> for DataCell {
229    fn from(v: DataInteger) -> Self {
230        DataCell::IntegerDataCell(v)
231    }
232}
233
234impl From<DataText> for DataCell {
235    fn from(v: DataText) -> Self {
236        DataCell::TextDataCell(v)
237    }
238}
239
240impl From<DataFloat> for DataCell {
241    fn from(v: DataFloat) -> Self {
242        DataCell::FloatDataCell(v)
243    }
244}
245
246impl From<DataBool> for DataCell {
247    fn from(v: DataBool) -> Self {
248        DataCell::BoolDataCell(v)
249    }
250}
251
252impl From<&str> for DataCell {
253    fn from(v: &str) -> Self {
254        DataCell::TextDataCell(v.to_owned())
255    }
256}
257
258#[macro_export]
259macro_rules! row {
260    ( $( $x:expr ),* ) => {
261        {
262            let mut temp_vec = Vec::<DataCell>::new();
263            $(
264                temp_vec.push(DataCell::from($x));
265            )*
266            temp_vec
267        }
268    };
269}
270
271#[cfg(test)]
272mod tests {
273    // Note this useful idiom: importing names from outer (for mod tests) scope.
274    use super::*;
275
276    #[test]
277    fn test_simple() {
278        let dataframe = DataFrame::new(
279            vec!["width", "height", "name", "in_stock", "count"],
280            vec![
281                row![0.4, 0.7, "book", true, 1],
282                row![3.0, 4.7, "poster", true, 1],
283            ],
284        );
285        assert_eq!(dataframe.is_ok(), true);
286    }
287
288    #[test]
289    fn test_simple_col() -> Result<(), Box<dyn Error>> {
290        let dataframe = DataFrame::new(
291            vec!["width", "height", "name", "in_stock", "count"],
292            vec![
293                row![0.4, 0.7, "book", true, 1],
294                row![3.0, 4.7, "poster", true, 1],
295            ],
296        )?;
297        if let DataColumn::FloatDataColumn(widths) = &dataframe["width"] {
298            assert_eq!(widths.data.len(), 2);
299        } else {
300            assert!(false, "wrong type")
301        }
302        Ok(())
303    }
304}