liquid_ml/dataframe/
row.rs

1//! Structs and functions for working with rows of data in a `DataFrame`.
2use crate::dataframe::{Fielder, Schema};
3use crate::error::LiquidError;
4use deepsize::DeepSizeOf;
5use serde::{Deserialize, Serialize};
6use sorer::dataframe::Data;
7use sorer::schema::DataType;
8use std::ops::Index;
9
10/// Represents a single row in a data frame.
11#[derive(Serialize, Deserialize, PartialEq, Clone, Debug, DeepSizeOf)]
12pub struct Row {
13    /// A clone of the `Schema` of the data frame this `Row` is from.
14    pub(crate) schema: Schema,
15    /// The data of this `Row` as boxed values.
16    pub(crate) data: Vec<Data>,
17    /// The offset of this `Row` in the data frame
18    idx: Option<usize>,
19}
20
21macro_rules! row_setter {
22    ($func_name:ident, $type:ty, $sorer_type:ident) => {
23        /// Sets the field in this `Row` at the given `col_idx` to have the given
24        /// `data`. The `DataType` at the `col_idx` must be an `Int` or it will
25        /// return a `TypeMismatch` error.
26        pub fn $func_name(
27            &mut self,
28            col_idx: usize,
29            data: $type,
30        ) -> Result<(), LiquidError> {
31            match self.schema.schema.get(col_idx) {
32                Some(DataType::$sorer_type) => {
33                    match self.data.get(col_idx).unwrap() {
34                        Data::Null | Data::$sorer_type(_) => {
35                            *self.data.get_mut(col_idx).unwrap() =
36                                Data::$sorer_type(data);
37                            Ok(())
38                        }
39                        _ => panic!("Something is horribly wrong"),
40                    }
41                }
42                None => Err(LiquidError::ColIndexOutOfBounds),
43                _ => Err(LiquidError::TypeMismatch),
44            }
45        }
46    };
47}
48
49/// Functions for creating, mutating, and getting data from `Row`s.
50impl Row {
51    /// Constructs a new `Row` with the given `Schema` and fills it with
52    /// `Null` values.
53    pub fn new(schema: &Schema) -> Self {
54        let mut data: Vec<Data> = Vec::new();
55        for _ in &schema.schema {
56            data.push(Data::Null);
57        }
58
59        Row {
60            schema: schema.clone(),
61            data,
62            idx: None,
63        }
64    }
65
66    row_setter!(set_int, i64, Int);
67    row_setter!(set_float, f64, Float);
68    row_setter!(set_bool, bool, Bool);
69    row_setter!(set_string, String, String);
70
71    /// Sets the field in this `Row` at the given `col_idx` to be `Null`.
72    pub fn set_null(&mut self, col_idx: usize) -> Result<(), LiquidError> {
73        match self.data.get(col_idx) {
74            Some(_) => {
75                *self.data.get_mut(col_idx).unwrap() = Data::Null;
76                Ok(())
77            }
78            _ => Err(LiquidError::ColIndexOutOfBounds),
79        }
80    }
81
82    /// Set the row offset in the dataframe for this `Row`.
83    pub fn set_idx(&mut self, idx: usize) {
84        self.idx = Some(idx);
85    }
86
87    /// Get the current index of this `Row`. Is `Some` if the index has been
88    /// set, and is `None` otherwise.
89    pub fn get_idx(&self) -> Option<usize> {
90        self.idx
91    }
92
93    /// Get a reference of the boxed value at the given `idx`.
94    pub fn get(&self, idx: usize) -> Result<&Data, LiquidError> {
95        match self.data.get(idx) {
96            Some(d) => Ok(d),
97            None => Err(LiquidError::ColIndexOutOfBounds),
98        }
99    }
100
101    /// Get the number of columns in this `Row`.
102    pub fn width(&self) -> usize {
103        self.data.len()
104    }
105
106    /// Get the `DataType` of the `Column` at the given `idx`.
107    pub fn col_type(&self, idx: usize) -> Result<&DataType, LiquidError> {
108        match self.schema.schema.get(idx) {
109            Some(d) => Ok(d),
110            None => Err(LiquidError::ColIndexOutOfBounds),
111        }
112    }
113
114    /// Accept a `Fielder` visitor for this row that visits all the elements in
115    /// this `Row`. Note that this method is only useful if the data held in
116    /// this `Row` is meaningful (ie, not only `Data::Null`).
117    pub fn accept<T: Fielder>(&self, f: &mut T) -> Result<(), LiquidError> {
118        for data in &self.data {
119            match data {
120                Data::Int(d) => f.visit_int(*d),
121                Data::Bool(d) => f.visit_bool(*d),
122                Data::Float(d) => f.visit_float(*d),
123                Data::String(d) => f.visit_string(&d),
124                Data::Null => f.visit_null(),
125            }
126        }
127
128        Ok(())
129    }
130}
131
132impl Index<&str> for Row {
133    type Output = Data;
134
135    fn index(&self, name: &str) -> &Self::Output {
136        let idx = self.schema.col_idx(name).unwrap();
137        self.get(idx).unwrap()
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use crate::dataframe::Fielder;
145    use crate::dataframe::Schema;
146
147    struct TestFielder {
148        pub num_null: usize,
149        pub num_ints: usize,
150        pub num_bools: usize,
151        pub num_floats: usize,
152        pub num_strings: usize,
153        pub start_idx: usize,
154    }
155
156    impl Fielder for TestFielder {
157        fn visit_bool(&mut self, _b: bool) {
158            self.num_bools += 1;
159        }
160
161        fn visit_float(&mut self, _f: f64) {
162            self.num_floats += 1;
163        }
164
165        fn visit_int(&mut self, _i: i64) {
166            self.num_ints += 1;
167        }
168
169        fn visit_string(&mut self, _s: &str) {
170            self.num_strings += 1;
171        }
172
173        fn visit_null(&mut self) {
174            self.num_null += 1;
175        }
176    }
177
178    fn init() -> (Vec<DataType>, Schema, Row) {
179        let data_types = vec![
180            DataType::Int,
181            DataType::Bool,
182            DataType::Float,
183            DataType::String,
184        ];
185        let s = Schema::from(data_types.clone());
186        let r = Row::new(&s);
187
188        (data_types, s, r)
189    }
190
191    #[test]
192    fn test_accept() {
193        let (_data_types, _s, mut r) = init();
194        r.set_int(0, 42).unwrap();
195        r.set_bool(1, true).unwrap();
196        r.set_float(2, 420.69).unwrap();
197        r.set_string(3, String::from("Finally a sane language"))
198            .unwrap();
199        let mut f = TestFielder {
200            num_null: 0,
201            num_ints: 0,
202            num_bools: 0,
203            num_floats: 0,
204            num_strings: 0,
205            start_idx: 1,
206        };
207        r.set_idx(1);
208        r.accept(&mut f).unwrap();
209        assert_eq!(f.num_null, 0);
210        assert_eq!(f.num_ints, 1);
211        assert_eq!(f.num_bools, 1);
212        assert_eq!(f.num_floats, 1);
213        assert_eq!(f.num_strings, 1);
214    }
215
216    #[test]
217    fn test_width() {
218        let mut s = Schema::new();
219        let r = Row::new(&s);
220        assert_eq!(r.width(), 0);
221        s.add_column(DataType::Int, None).unwrap();
222        assert_eq!(r.width(), 0);
223        let r2 = Row::new(&s);
224        assert_eq!(r2.width(), 1);
225    }
226
227    #[test]
228    fn test_col_type() {
229        let (data_types, _s, r) = init();
230        for (idx, data_type) in data_types.iter().enumerate() {
231            assert_eq!(data_type, r.col_type(idx).unwrap());
232        }
233    }
234
235    #[test]
236    fn test_get_set_idx() {
237        let (_data_types, _s, mut r) = init();
238        assert_eq!(r.get_idx().is_none(), true);
239        r.set_idx(0);
240        assert_eq!(r.get_idx().unwrap(), 0);
241    }
242
243    #[test]
244    fn test_getters_and_setters() {
245        let (_data_types, _s, mut r) = init();
246
247        for d in r.data.iter() {
248            assert_eq!(&Data::Null, d);
249        }
250
251        r.set_int(0, 42).unwrap();
252        assert_eq!(&Data::Int(42), r.get(0).unwrap());
253        r.set_bool(1, false).unwrap();
254        assert_eq!(&Data::Bool(false), r.get(1).unwrap());
255        r.set_float(2, 3.14).unwrap();
256        assert_eq!(&Data::Float(3.14), r.get(2).unwrap());
257        r.set_string(3, String::from("foo")).unwrap();
258        assert_eq!(&Data::String(String::from("foo")), r.get(3).unwrap());
259
260        r.set_null(3).unwrap();
261        assert_eq!(&Data::Null, r.get(3).unwrap());
262    }
263}