polars_rows_iter/dataframe_rows_iter_ext.rs
1use std::collections::HashMap;
2
3use polars::prelude::*;
4
5use crate::{ColumnNameBuilder, FromDataFrameRow, IterFromColumn};
6
7pub trait DataframeRowsIterExt<'a> {
8 fn rows_iter<T>(&'a self) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
9 where
10 T: FromDataFrameRow<'a>;
11
12 fn rows_iter_with_columns<T>(
13 &'a self,
14 build_fn: impl FnOnce(&mut T::Builder) -> &mut T::Builder,
15 ) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
16 where
17 T: FromDataFrameRow<'a>;
18
19 fn scalar_iter<T>(&'a self, column_name: &'a str) -> PolarsResult<impl Iterator<Item = PolarsResult<T>> + 'a>
20 where
21 T: IterFromColumn<'a> + 'a;
22}
23
24impl<'a> DataframeRowsIterExt<'a> for DataFrame {
25 /// Creates a row iterator for this DataFrame with static column names defined in row struct
26 /// ```rust
27 /// use polars::prelude::*;
28 /// use polars_rows_iter::*;
29 ///
30 /// #[derive(Debug, FromDataFrameRow)]
31 /// #[derive(PartialEq)] // for assert_eq
32 /// struct MyRow<'a>
33 /// {
34 /// #[column("col_a")]
35 /// a: i32,
36 /// // the column name defaults to the field name if no explicit name given
37 /// col_b: &'a str,
38 /// col_c: String,
39 /// #[column("col_d")]
40 /// optional: Option<f64>
41 /// }
42 ///
43 /// let df = df!(
44 /// "col_a" => [1i32, 2, 3, 4, 5],
45 /// "col_b" => ["a", "b", "c", "d", "e"],
46 /// "col_c" => ["A", "B", "C", "D", "E"],
47 /// "col_d" => [Some(1.0f64), None, None, Some(2.0), Some(3.0)]
48 /// ).unwrap();
49 ///
50 /// let rows_iter = df.rows_iter::<MyRow>().unwrap(); // ready to use row iterator
51 /// // collect to vector for assert_eq
52 /// let rows_vec = rows_iter.collect::<PolarsResult<Vec<MyRow>>>().unwrap();
53 ///
54 /// assert_eq!(
55 /// rows_vec,
56 /// [
57 /// MyRow { a: 1, col_b: "a", col_c: "A".to_string(), optional: Some(1.0) },
58 /// MyRow { a: 2, col_b: "b", col_c: "B".to_string(), optional: None },
59 /// MyRow { a: 3, col_b: "c", col_c: "C".to_string(), optional: None },
60 /// MyRow { a: 4, col_b: "d", col_c: "D".to_string(), optional: Some(2.0) },
61 /// MyRow { a: 5, col_b: "e", col_c: "E".to_string(), optional: Some(3.0) },
62 /// ]
63 /// );
64 /// ```
65 fn rows_iter<T>(&'a self) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
66 where
67 T: FromDataFrameRow<'a>,
68 {
69 T::from_dataframe(self, HashMap::new())
70 }
71
72 /// Creates a row iterator for this DataFrame with custom column names, which can be defined over the lambda function
73 /// for every struct field. If no custom column name for a field is given, the column name falls back to
74 /// the statically defined one.
75 ///```rust
76 ///use polars::prelude::*;
77 ///use polars_rows_iter::*;
78 ///
79 ///const ID: &str = "id";
80 ///
81 ///#[derive(Debug, FromDataFrameRow)]
82 ///#[derive(PartialEq)] // for assert_eq
83 ///struct MyRow<'a> {
84 /// #[column(ID)]
85 /// id: i32,
86 /// value_b: &'a str,
87 /// value_c: String,
88 /// optional: Option<f64>,
89 ///}
90 ///
91 /// let df = df!(
92 /// "id" => [1i32, 2, 3, 4, 5],
93 /// "col_b" => ["a", "b", "c", "d", "e"],
94 /// "col_c" => ["A", "B", "C", "D", "E"],
95 /// "col_d" => [Some(1.0f64), None, None, Some(2.0), Some(3.0)]
96 /// ).unwrap();
97 ///
98 /// let value_b_column_name = "col_b".to_string();
99 /// let value_c_column_name = "col_c";
100 ///
101 /// let rows_iter = df.rows_iter_with_columns::<MyRow>(|columns| {
102 /// columns
103 /// .value_b(&value_b_column_name)
104 /// .value_c(value_c_column_name)
105 /// .optional("col_d")
106 /// }).unwrap();
107 ///
108 /// // collect to vector for assert_eq
109 /// let rows_vec = rows_iter.collect::<PolarsResult<Vec<MyRow>>>().unwrap();
110 ///
111 /// assert_eq!(
112 /// rows_vec,
113 /// [
114 /// MyRow { id: 1, value_b: "a", value_c: "A".to_string(), optional: Some(1.0) },
115 /// MyRow { id: 2, value_b: "b", value_c: "B".to_string(), optional: None },
116 /// MyRow { id: 3, value_b: "c", value_c: "C".to_string(), optional: None },
117 /// MyRow { id: 4, value_b: "d", value_c: "D".to_string(), optional: Some(2.0) },
118 /// MyRow { id: 5, value_b: "e", value_c: "E".to_string(), optional: Some(3.0) },
119 /// ]
120 /// );
121 ///```
122 fn rows_iter_with_columns<T>(
123 &'a self,
124 build_fn: impl FnOnce(&mut T::Builder) -> &mut T::Builder,
125 ) -> PolarsResult<Box<dyn Iterator<Item = PolarsResult<T>> + 'a>>
126 where
127 T: FromDataFrameRow<'a>,
128 {
129 let mut builder = T::create_builder();
130
131 build_fn(&mut builder);
132
133 let columns = builder.build();
134
135 T::from_dataframe(self, columns)
136 }
137
138 /// Creates an iterator for a single column in the DataFrame
139 ///
140 /// This is a simpler alternative to `rows_iter` when you only need to iterate over one column.
141 /// The type parameter `T` specifies the Rust type to convert column values to.
142 ///
143 /// ```rust
144 /// use polars::prelude::*;
145 /// use polars_rows_iter::*;
146 ///
147 /// let df = df!(
148 /// "col_a" => [1i32, 2, 3, 4, 5],
149 /// "col_b" => ["a", "b", "c", "d", "e"],
150 /// "col_c" => [Some("A"), Some("B"), None, None, Some("E")],
151 /// ).unwrap();
152 ///
153 /// // Iterate over a column with non-nullable values
154 /// let values_a = df.scalar_iter::<i32>("col_a")
155 /// .unwrap()
156 /// .collect::<PolarsResult<Vec<i32>>>()
157 /// .unwrap();
158 /// assert_eq!(values_a, [1, 2, 3, 4, 5]);
159 ///
160 /// // Iterate over a column with borrowed string values
161 /// let values_b = df.scalar_iter::<&str>("col_b")
162 /// .unwrap()
163 /// .collect::<PolarsResult<Vec<&str>>>()
164 /// .unwrap();
165 /// assert_eq!(values_b, ["a", "b", "c", "d", "e"]);
166 ///
167 /// // Iterate over a column with optional values
168 /// let values_c = df.scalar_iter::<Option<String>>("col_c")
169 /// .unwrap()
170 /// .collect::<PolarsResult<Vec<Option<String>>>>()
171 /// .unwrap();
172 /// assert_eq!(
173 /// values_c,
174 /// [Some("A".to_string()), Some("B".to_string()), None, None, Some("E".to_string())]
175 /// );
176 /// ```
177 fn scalar_iter<T>(&'a self, column_name: &'a str) -> PolarsResult<impl Iterator<Item = PolarsResult<T>> + 'a>
178 where
179 T: IterFromColumn<'a> + 'a,
180 {
181 let column = self.column(column_name)?;
182 let column_dtype = column.dtype();
183
184 let iter = <T as IterFromColumn<'a>>::create_iter(column)?;
185 let iter = iter.map(|v| <T as IterFromColumn<'a>>::get_value(v, column_name, column_dtype));
186
187 Ok(iter)
188 }
189}
190
191#[cfg(test)]
192mod tests {
193 #![allow(dead_code)]
194
195 use polars::df;
196
197 use crate::*;
198
199 #[derive(FromDataFrameRow)]
200 struct TestStruct {
201 x1: i32,
202 x2: i32,
203 }
204
205 #[test]
206 fn rows_iter_should_return_error_when_given_column_not_available() {
207 let df = df!(
208 "y1" => [1i32, 2, 3],
209 "x2" => [1i32, 2, 3]
210 )
211 .unwrap();
212
213 let result = df.rows_iter::<TestStruct>();
214
215 assert!(result.is_err());
216 }
217
218 #[test]
219 fn builder_should_build_hashmap_with_correct_entries() {
220 let mut builder = TestStruct::create_builder();
221 builder.x1("column_1").x2("column_2");
222 let columns = builder.build();
223
224 assert_eq!("column_1", *columns.get("x1").unwrap());
225 assert_eq!("column_2", *columns.get("x2").unwrap());
226 }
227
228 #[test]
229 fn rows_iter_with_columns_should_return_error_when_given_column_not_available() {
230 let df = df!(
231 "x1" => [1i32, 2, 3],
232 "x2" => [1i32, 2, 3]
233 )
234 .unwrap();
235
236 let result = df.rows_iter_with_columns::<TestStruct>(|b| b.x1("y1"));
237
238 assert!(result.is_err());
239 }
240
241 #[test]
242 fn rows_iter_with_columns_should_return_valid_iter() {
243 let df = df!(
244 "x_1" => [1i32, 2, 3],
245 "x_2" => [1i32, 2, 3]
246 )
247 .unwrap();
248
249 let result = df.rows_iter_with_columns::<TestStruct>(|b| b.x1("x_1").x2("x_2"));
250
251 assert!(result.is_ok());
252 }
253}