polars_rows_iter/
dataframe_rows_iter_ext.rs

1use std::collections::HashMap;
2
3use polars::prelude::*;
4
5use crate::{ColumnNameBuilder, FromDataFrameRow};
6
7pub trait DataframeRowsIterExt<'a> {
8    fn rows_iter<T>(&'a self) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
9    where
10        T: FromDataFrameRow<'a>;
11
12    fn rows_iter_with_columns<T>(
13        &'a self,
14        build_fn: impl FnOnce(&mut T::Builder) -> &mut T::Builder,
15    ) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
16    where
17        T: FromDataFrameRow<'a>;
18}
19
20impl<'a> DataframeRowsIterExt<'a> for DataFrame {
21    /// Creates a row iterator for this DataFrame with static column names defined in row struct
22    /// ```rust
23    /// use polars::prelude::*;
24    /// use polars_rows_iter::*;
25    ///
26    ///    #[derive(Debug, FromDataFrameRow)]
27    ///    #[derive(PartialEq)] // for assert_eq
28    ///    struct MyRow<'a>
29    ///    {
30    ///        #[column("col_a")]
31    ///        a: i32,
32    ///        // the column name defaults to the field name if no explicit name given
33    ///        col_b: &'a str,
34    ///        col_c: String,
35    ///        #[column("col_d")]
36    ///        optional: Option<f64>
37    ///    }
38    ///   
39    ///    let df = df!(
40    ///            "col_a" => [1i32, 2, 3, 4, 5],
41    ///            "col_b" => ["a", "b", "c", "d", "e"],
42    ///            "col_c" => ["A", "B", "C", "D", "E"],
43    ///            "col_d" => [Some(1.0f64), None, None, Some(2.0), Some(3.0)]
44    ///        ).unwrap();
45    ///   
46    ///    let rows_iter = df.rows_iter::<MyRow>().unwrap(); // ready to use row iterator
47    ///    // collect to vector for assert_eq
48    ///    let rows_vec = rows_iter.collect::<PolarsResult<Vec<MyRow>>>().unwrap();
49    ///   
50    ///    assert_eq!(
51    ///        rows_vec,
52    ///        [
53    ///            MyRow { a: 1, col_b: "a", col_c: "A".to_string(), optional: Some(1.0) },
54    ///            MyRow { a: 2, col_b: "b", col_c: "B".to_string(), optional: None },
55    ///            MyRow { a: 3, col_b: "c", col_c: "C".to_string(), optional: None },
56    ///            MyRow { a: 4, col_b: "d", col_c: "D".to_string(), optional: Some(2.0) },
57    ///            MyRow { a: 5, col_b: "e", col_c: "E".to_string(), optional: Some(3.0) },
58    ///        ]
59    ///    );
60    /// ```
61    fn rows_iter<T>(&'a self) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
62    where
63        T: FromDataFrameRow<'a>,
64    {
65        T::from_dataframe(self, HashMap::new())
66    }
67
68    /// Creates a row iterator for this DataFrame with custom column names, which can be defined over the lambda function
69    /// for every struct field. If no custom column name for a field is given, the column name falls back to
70    /// the statically defined one.
71    ///```rust
72    ///use polars::prelude::*;
73    ///use polars_rows_iter::*;
74    ///
75    ///const ID: &str = "id";
76    ///
77    ///#[derive(Debug, FromDataFrameRow)]
78    ///#[derive(PartialEq)] // for assert_eq
79    ///struct MyRow<'a> {
80    ///    #[column(ID)]
81    ///    id: i32,
82    ///    value_b: &'a str,
83    ///    value_c: String,
84    ///    optional: Option<f64>,
85    ///}
86    ///
87    ///    let df = df!(
88    ///        "id" => [1i32, 2, 3, 4, 5],
89    ///        "col_b" => ["a", "b", "c", "d", "e"],
90    ///        "col_c" => ["A", "B", "C", "D", "E"],
91    ///        "col_d" => [Some(1.0f64), None, None, Some(2.0), Some(3.0)]
92    ///    ).unwrap();
93    ///
94    ///    let value_b_column_name = "col_b".to_string();
95    ///    let value_c_column_name = "col_c";
96    ///
97    ///    let rows_iter = df.rows_iter_with_columns::<MyRow>(|columns| {
98    ///        columns
99    ///            .value_b(&value_b_column_name)
100    ///            .value_c(value_c_column_name)
101    ///            .optional("col_d")
102    ///    }).unwrap();
103    ///
104    ///    // collect to vector for assert_eq
105    ///    let rows_vec = rows_iter.collect::<PolarsResult<Vec<MyRow>>>().unwrap();
106    ///   
107    ///    assert_eq!(
108    ///        rows_vec,
109    ///        [
110    ///            MyRow { id: 1, value_b: "a", value_c: "A".to_string(), optional: Some(1.0) },
111    ///            MyRow { id: 2, value_b: "b", value_c: "B".to_string(), optional: None },
112    ///            MyRow { id: 3, value_b: "c", value_c: "C".to_string(), optional: None },
113    ///            MyRow { id: 4, value_b: "d", value_c: "D".to_string(), optional: Some(2.0) },
114    ///            MyRow { id: 5, value_b: "e", value_c: "E".to_string(), optional: Some(3.0) },
115    ///        ]
116    ///    );
117    ///```
118    fn rows_iter_with_columns<T>(
119        &'a self,
120        build_fn: impl FnOnce(&mut T::Builder) -> &mut T::Builder,
121    ) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
122    where
123        T: FromDataFrameRow<'a>,
124    {
125        let mut builder = T::create_builder();
126
127        build_fn(&mut builder);
128
129        let columns = builder.build();
130
131        T::from_dataframe(self, columns)
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    #![allow(dead_code)]
138
139    use polars::df;
140
141    use crate::*;
142
143    #[derive(FromDataFrameRow)]
144    struct TestStruct {
145        x1: i32,
146        x2: i32,
147    }
148
149    #[test]
150    fn rows_iter_should_return_error_when_given_column_not_available() {
151        let df = df!(
152            "y1" => [1i32, 2, 3],
153            "x2" => [1i32, 2, 3]
154        )
155        .unwrap();
156
157        let result = df.rows_iter::<TestStruct>();
158
159        assert!(result.is_err());
160    }
161
162    #[test]
163    fn builder_should_build_hashmap_with_correct_entries() {
164        let mut builder = TestStruct::create_builder();
165        builder.x1("column_1").x2("column_2");
166        let columns = builder.build();
167
168        assert_eq!("column_1", *columns.get("x1").unwrap());
169        assert_eq!("column_2", *columns.get("x2").unwrap());
170    }
171
172    #[test]
173    fn rows_iter_with_columns_should_return_error_when_given_column_not_available() {
174        let df = df!(
175            "x1" => [1i32, 2, 3],
176            "x2" => [1i32, 2, 3]
177        )
178        .unwrap();
179
180        let result = df.rows_iter_with_columns::<TestStruct>(|b| b.x1("y1"));
181
182        assert!(result.is_err());
183    }
184
185    #[test]
186    fn rows_iter_with_columns_should_return_valid_iter() {
187        let df = df!(
188            "x_1" => [1i32, 2, 3],
189            "x_2" => [1i32, 2, 3]
190        )
191        .unwrap();
192
193        let result = df.rows_iter_with_columns::<TestStruct>(|b| b.x1("x_1").x2("x_2"));
194
195        assert!(result.is_ok());
196    }
197}