polars_helpers/
lib.rs

1use polars::prelude::*;
2use std::str::FromStr;
3mod map;
4pub use map::*;
5mod url;
6pub use url::*;
7mod dates;
8pub use dates::*;
9mod dbg;
10pub use dbg::*;
11mod string;
12// / A trait to validate a dataframe
13pub trait Validate {
14    /// Validate the shape of the dataframe
15    fn has_shape(self, shape: (usize, usize)) -> Self;
16    /// Validate the columns of the dataframe
17    fn has_cols(self, columns: Vec<&str>) -> Self;
18    /// Validate the length of the dataframe
19    fn has_length(self, length: Comparison) -> Self;
20    /// Validate the dataframe with an expression
21    fn validate<S>(self, expr: Expr) -> Self;
22}
23
24pub enum Comparison {
25    GreaterThan(usize),
26    LessThan(usize),
27    Equal(usize),
28}
29
30impl Validate for LazyFrame {
31    fn has_shape(self, shape: (usize, usize)) -> Self {
32        self.collect().unwrap().has_shape(shape).lazy()
33    }
34    fn has_cols(self, columns: Vec<&str>) -> Self {
35        self.collect().unwrap().has_cols(columns).lazy()
36    }
37    fn has_length(self, length: Comparison) -> Self {
38        self.collect().unwrap().has_length(length).lazy()
39    }
40    fn validate<S>(self, expr: Expr) -> Self {
41        self.collect().unwrap().validate::<S>(expr).lazy()
42    }
43}
44
45impl Validate for DataFrame {
46    fn has_shape(self, shape: (usize, usize)) -> Self {
47        assert_eq!(self.shape(), shape, "Checking Shape");
48        self
49    }
50    fn has_cols(self, columns: Vec<&str>) -> Self {
51        assert_eq!(self.get_column_names(), columns, "Checking Columns");
52        self
53    }
54    fn has_length(self, length: Comparison) -> Self {
55        match length {
56            Comparison::GreaterThan(len) => {
57                assert!(
58                    self.height() > len,
59                    "Checking length is greater than {}",
60                    len
61                );
62            }
63            Comparison::LessThan(len) => {
64                assert!(self.height() < len, "Checking length is less than {}", len);
65            }
66            Comparison::Equal(len) => {
67                assert_eq!(self.height(), len, "Checking length is equal to {}", len);
68            }
69        }
70        self
71    }
72    fn validate<S>(self, expr: Expr) -> Self {
73        assert!(
74            self.clone().lazy().select([expr]).collect().unwrap()[0]
75                .bool()
76                .expect("Validation did not return a boolean")
77                .get(0)
78                .unwrap(),
79            "Validation failed"
80        );
81        self
82    }
83}
84
85pub trait DataFrameTesting {
86    fn get_cell<T>(&self, col: &str, row: usize) -> T
87    where
88        T: FromStr,
89        <T as std::str::FromStr>::Err: std::fmt::Debug;
90}
91
92impl DataFrameTesting for DataFrame {
93    fn get_cell<T>(&self, col: &str, row: usize) -> T
94    where
95        T: FromStr,
96        <T as std::str::FromStr>::Err: std::fmt::Debug,
97    {
98        let temp = self
99            .column(col)
100            .expect("Col does not exist")
101            .cast(&DataType::String)
102            .expect("Unable to cast value to string");
103
104        temp.get(row)
105            .expect("Row does not exist")
106            .get_str()
107            .expect("Cell is empty")
108            .parse::<T>()
109            .expect("Type did not match when parsing cell")
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116    #[test]
117    fn test_get_cell() {
118        let df = df!("one" => [1]).unwrap();
119        assert_eq!(df.get_cell::<usize>("one", 0), 1);
120    }
121}