polars_rows_iter/
dataframe_rows_iter_ext.rs

1use std::collections::HashMap;
2
3use polars::prelude::*;
4
5use crate::{ColumnNameBuilder, FromDataFrameRow, IterFromColumn};
6
7pub trait DataframeRowsIterExt<'a> {
8    fn rows_iter<T>(&'a self) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
9    where
10        T: FromDataFrameRow<'a>;
11
12    fn rows_iter_with_columns<T>(
13        &'a self,
14        build_fn: impl FnOnce(&mut T::Builder) -> &mut T::Builder,
15    ) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
16    where
17        T: FromDataFrameRow<'a>;
18
19    fn scalar_iter<T>(&'a self, column_name: &'a str) -> PolarsResult<impl Iterator<Item = PolarsResult<T>> + 'a>
20    where
21        T: IterFromColumn<'a> + 'a;
22}
23
24impl<'a> DataframeRowsIterExt<'a> for DataFrame {
25    /// Creates a row iterator for this DataFrame with static column names defined in row struct
26    /// ```rust
27    /// use polars::prelude::*;
28    /// use polars_rows_iter::*;
29    ///
30    ///    #[derive(Debug, FromDataFrameRow)]
31    ///    #[derive(PartialEq)] // for assert_eq
32    ///    struct MyRow<'a>
33    ///    {
34    ///        #[column("col_a")]
35    ///        a: i32,
36    ///        // the column name defaults to the field name if no explicit name given
37    ///        col_b: &'a str,
38    ///        col_c: String,
39    ///        #[column("col_d")]
40    ///        optional: Option<f64>
41    ///    }
42    ///   
43    ///    let df = df!(
44    ///            "col_a" => [1i32, 2, 3, 4, 5],
45    ///            "col_b" => ["a", "b", "c", "d", "e"],
46    ///            "col_c" => ["A", "B", "C", "D", "E"],
47    ///            "col_d" => [Some(1.0f64), None, None, Some(2.0), Some(3.0)]
48    ///        ).unwrap();
49    ///   
50    ///    let rows_iter = df.rows_iter::<MyRow>().unwrap(); // ready to use row iterator
51    ///    // collect to vector for assert_eq
52    ///    let rows_vec = rows_iter.collect::<PolarsResult<Vec<MyRow>>>().unwrap();
53    ///   
54    ///    assert_eq!(
55    ///        rows_vec,
56    ///        [
57    ///            MyRow { a: 1, col_b: "a", col_c: "A".to_string(), optional: Some(1.0) },
58    ///            MyRow { a: 2, col_b: "b", col_c: "B".to_string(), optional: None },
59    ///            MyRow { a: 3, col_b: "c", col_c: "C".to_string(), optional: None },
60    ///            MyRow { a: 4, col_b: "d", col_c: "D".to_string(), optional: Some(2.0) },
61    ///            MyRow { a: 5, col_b: "e", col_c: "E".to_string(), optional: Some(3.0) },
62    ///        ]
63    ///    );
64    /// ```
65    fn rows_iter<T>(&'a self) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
66    where
67        T: FromDataFrameRow<'a>,
68    {
69        T::from_dataframe(self, HashMap::new())
70    }
71
72    /// Creates a row iterator for this DataFrame with custom column names, which can be defined over the lambda function
73    /// for every struct field. If no custom column name for a field is given, the column name falls back to
74    /// the statically defined one.
75    ///```rust
76    ///use polars::prelude::*;
77    ///use polars_rows_iter::*;
78    ///
79    ///const ID: &str = "id";
80    ///
81    ///#[derive(Debug, FromDataFrameRow)]
82    ///#[derive(PartialEq)] // for assert_eq
83    ///struct MyRow<'a> {
84    ///    #[column(ID)]
85    ///    id: i32,
86    ///    value_b: &'a str,
87    ///    value_c: String,
88    ///    optional: Option<f64>,
89    ///}
90    ///
91    ///    let df = df!(
92    ///        "id" => [1i32, 2, 3, 4, 5],
93    ///        "col_b" => ["a", "b", "c", "d", "e"],
94    ///        "col_c" => ["A", "B", "C", "D", "E"],
95    ///        "col_d" => [Some(1.0f64), None, None, Some(2.0), Some(3.0)]
96    ///    ).unwrap();
97    ///
98    ///    let value_b_column_name = "col_b".to_string();
99    ///    let value_c_column_name = "col_c";
100    ///
101    ///    let rows_iter = df.rows_iter_with_columns::<MyRow>(|columns| {
102    ///        columns
103    ///            .value_b(&value_b_column_name)
104    ///            .value_c(value_c_column_name)
105    ///            .optional("col_d")
106    ///    }).unwrap();
107    ///
108    ///    // collect to vector for assert_eq
109    ///    let rows_vec = rows_iter.collect::<PolarsResult<Vec<MyRow>>>().unwrap();
110    ///   
111    ///    assert_eq!(
112    ///        rows_vec,
113    ///        [
114    ///            MyRow { id: 1, value_b: "a", value_c: "A".to_string(), optional: Some(1.0) },
115    ///            MyRow { id: 2, value_b: "b", value_c: "B".to_string(), optional: None },
116    ///            MyRow { id: 3, value_b: "c", value_c: "C".to_string(), optional: None },
117    ///            MyRow { id: 4, value_b: "d", value_c: "D".to_string(), optional: Some(2.0) },
118    ///            MyRow { id: 5, value_b: "e", value_c: "E".to_string(), optional: Some(3.0) },
119    ///        ]
120    ///    );
121    ///```
122    fn rows_iter_with_columns<T>(
123        &'a self,
124        build_fn: impl FnOnce(&mut T::Builder) -> &mut T::Builder,
125    ) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
126    where
127        T: FromDataFrameRow<'a>,
128    {
129        let mut builder = T::create_builder();
130
131        build_fn(&mut builder);
132
133        let columns = builder.build();
134
135        T::from_dataframe(self, columns)
136    }
137
138    /// Creates an iterator for a single column in the DataFrame
139    ///
140    /// This is a simpler alternative to `rows_iter` when you only need to iterate over one column.
141    /// The type parameter `T` specifies the Rust type to convert column values to.
142    ///
143    /// ```rust
144    /// use polars::prelude::*;
145    /// use polars_rows_iter::*;
146    ///
147    /// let df = df!(
148    ///     "col_a" => [1i32, 2, 3, 4, 5],
149    ///     "col_b" => ["a", "b", "c", "d", "e"],
150    ///     "col_c" => [Some("A"), Some("B"), None, None, Some("E")],
151    /// ).unwrap();
152    ///
153    /// // Iterate over a column with non-nullable values
154    /// let values_a = df.scalar_iter::<i32>("col_a")
155    ///     .unwrap()
156    ///     .collect::<PolarsResult<Vec<i32>>>()
157    ///     .unwrap();
158    /// assert_eq!(values_a, [1, 2, 3, 4, 5]);
159    ///
160    /// // Iterate over a column with borrowed string values
161    /// let values_b = df.scalar_iter::<&str>("col_b")
162    ///     .unwrap()
163    ///     .collect::<PolarsResult<Vec<&str>>>()
164    ///     .unwrap();
165    /// assert_eq!(values_b, ["a", "b", "c", "d", "e"]);
166    ///
167    /// // Iterate over a column with optional values
168    /// let values_c = df.scalar_iter::<Option<String>>("col_c")
169    ///     .unwrap()
170    ///     .collect::<PolarsResult<Vec<Option<String>>>>()
171    ///     .unwrap();
172    /// assert_eq!(
173    ///     values_c,
174    ///     [Some("A".to_string()), Some("B".to_string()), None, None, Some("E".to_string())]
175    /// );
176    /// ```
177    fn scalar_iter<T>(&'a self, column_name: &'a str) -> PolarsResult<impl Iterator<Item = PolarsResult<T>> + 'a>
178    where
179        T: IterFromColumn<'a> + 'a,
180    {
181        let column = self.column(column_name)?;
182        let column_dtype = column.dtype();
183
184        let iter = <T as IterFromColumn<'a>>::create_iter(column)?;
185        let iter = iter.map(|v| <T as IterFromColumn<'a>>::get_value(v, column_name, column_dtype));
186
187        Ok(iter)
188    }
189}
190
191#[cfg(test)]
192mod tests {
193    #![allow(dead_code)]
194
195    use polars::df;
196
197    use crate::*;
198
199    #[derive(FromDataFrameRow)]
200    struct TestStruct {
201        x1: i32,
202        x2: i32,
203    }
204
205    #[test]
206    fn rows_iter_should_return_error_when_given_column_not_available() {
207        let df = df!(
208            "y1" => [1i32, 2, 3],
209            "x2" => [1i32, 2, 3]
210        )
211        .unwrap();
212
213        let result = df.rows_iter::<TestStruct>();
214
215        assert!(result.is_err());
216    }
217
218    #[test]
219    fn builder_should_build_hashmap_with_correct_entries() {
220        let mut builder = TestStruct::create_builder();
221        builder.x1("column_1").x2("column_2");
222        let columns = builder.build();
223
224        assert_eq!("column_1", *columns.get("x1").unwrap());
225        assert_eq!("column_2", *columns.get("x2").unwrap());
226    }
227
228    #[test]
229    fn rows_iter_with_columns_should_return_error_when_given_column_not_available() {
230        let df = df!(
231            "x1" => [1i32, 2, 3],
232            "x2" => [1i32, 2, 3]
233        )
234        .unwrap();
235
236        let result = df.rows_iter_with_columns::<TestStruct>(|b| b.x1("y1"));
237
238        assert!(result.is_err());
239    }
240
241    #[test]
242    fn rows_iter_with_columns_should_return_valid_iter() {
243        let df = df!(
244            "x_1" => [1i32, 2, 3],
245            "x_2" => [1i32, 2, 3]
246        )
247        .unwrap();
248
249        let result = df.rows_iter_with_columns::<TestStruct>(|b| b.x1("x_1").x2("x_2"));
250
251        assert!(result.is_ok());
252    }
253}