polars_rows_iter/iter_from_column/
iter_from_column_string.rs

1use super::iter_from_column_str::create_iter;
2use super::*;
3use iter_from_column_trait::IterFromColumn;
4use polars::prelude::*;
5
6impl<'a> IterFromColumn<'a> for String {
7    type RawInner = &'a str;
8    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
9        create_iter(column)
10    }
11
12    #[inline]
13    fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
14    where
15        Self: Sized,
16    {
17        Ok(polars_value
18            .ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))?
19            .to_string())
20    }
21}
22
23impl<'a> IterFromColumn<'a> for Option<String> {
24    type RawInner = &'a str;
25    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
26        create_iter(column)
27    }
28
29    #[inline]
30    fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
31    where
32        Self: Sized,
33    {
34        Ok(polars_value.map(|s| s.to_string()))
35    }
36}
37
38#[cfg(test)]
39mod tests {
40    use crate::*;
41    use itertools::{izip, Itertools};
42    use polars::prelude::*;
43    use rand::{rngs::StdRng, SeedableRng};
44    use shared_test_helpers::*;
45
46    const ROW_COUNT: usize = 64;
47
48    #[test]
49    fn str_rows_iter_test() {
50        let mut rng = StdRng::seed_from_u64(0);
51        let height = ROW_COUNT;
52        let dtype = DataType::String;
53
54        let col = create_column("col", &dtype, false, height, &mut rng);
55        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
56
57        let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
58        let col_opt_values = col_opt
59            .str()
60            .unwrap()
61            .iter()
62            .map(|v| v.map(|s| s.to_owned()))
63            .collect_vec();
64
65        let df = DataFrame::new(vec![col, col_opt]).unwrap();
66
67        let col_iter = col_values.into_iter();
68        let col_opt_iter = col_opt_values.into_iter();
69
70        let expected_rows = izip!(col_iter, col_opt_iter)
71            .map(|(col, col_opt)| TestRow { col, col_opt })
72            .collect_vec();
73
74        #[derive(Debug, FromDataFrameRow, PartialEq)]
75        struct TestRow {
76            col: String,
77            col_opt: Option<String>,
78        }
79
80        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
81
82        assert_eq!(rows, expected_rows)
83    }
84    #[test]
85    fn str_scalar_iter_test() {
86        let mut rng = StdRng::seed_from_u64(0);
87        let height = ROW_COUNT;
88        let dtype = DataType::String;
89
90        let col = create_column("col", &dtype, false, height, &mut rng);
91        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
92
93        let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
94
95        let df = DataFrame::new(vec![col, col_opt]).unwrap();
96
97        let values = df
98            .scalar_iter("col")
99            .unwrap()
100            .collect::<PolarsResult<Vec<String>>>()
101            .unwrap();
102
103        assert_eq!(values, col_values)
104    }
105
106    #[test]
107    fn str_scalar_iter_opt_test() {
108        let mut rng = StdRng::seed_from_u64(0);
109        let height = ROW_COUNT;
110        let dtype = DataType::String;
111
112        let col = create_column("col", &dtype, false, height, &mut rng);
113        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
114
115        let col_opt_values = col_opt
116            .str()
117            .unwrap()
118            .iter()
119            .map(|v| v.map(|s| s.to_owned()))
120            .collect_vec();
121
122        let df = DataFrame::new(vec![col, col_opt]).unwrap();
123
124        let values = df
125            .scalar_iter("col_opt")
126            .unwrap()
127            .collect::<PolarsResult<Vec<Option<String>>>>()
128            .unwrap();
129
130        assert_eq!(values, col_opt_values)
131    }
132
133    #[cfg(feature = "dtype-categorical")]
134    #[test]
135    fn cat_rows_iter_test() {
136        let mut rng = StdRng::seed_from_u64(0);
137        let height = ROW_COUNT;
138
139        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
140        let dtype = DataType::from_categories(cats);
141
142        let col = create_column("col", &dtype, false, height, &mut rng);
143        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
144
145        let col_values = col
146            .cat::<Categorical32Type>()
147            .unwrap()
148            .iter_str()
149            .map(|v| v.unwrap().to_owned())
150            .collect_vec();
151        let col_opt_values = col_opt
152            .cat::<Categorical32Type>()
153            .unwrap()
154            .iter_str()
155            .map(|v| v.map(|s| s.to_owned()))
156            .collect_vec();
157
158        let df = DataFrame::new(vec![col, col_opt]).unwrap();
159
160        let col_iter = col_values.into_iter();
161        let col_opt_iter = col_opt_values.into_iter();
162
163        let expected_rows = izip!(col_iter, col_opt_iter)
164            .map(|(col, col_opt)| TestRow { col, col_opt })
165            .collect_vec();
166
167        #[derive(Debug, FromDataFrameRow, PartialEq)]
168        struct TestRow {
169            col: String,
170            col_opt: Option<String>,
171        }
172
173        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
174
175        assert_eq!(rows, expected_rows)
176    }
177
178    #[cfg(feature = "dtype-categorical")]
179    #[test]
180    fn cat_scalar_iter_test() {
181        use crate::DataframeRowsIterExt;
182
183        let mut rng = StdRng::seed_from_u64(0);
184        let height = ROW_COUNT;
185
186        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
187        let dtype = DataType::from_categories(cats);
188
189        let col = create_column("col", &dtype, false, height, &mut rng);
190        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
191
192        let col_values = col
193            .cat32()
194            .unwrap()
195            .iter_str()
196            .map(|v| v.unwrap().to_owned())
197            .collect_vec();
198
199        let df = DataFrame::new(vec![col, col_opt]).unwrap();
200
201        let values = df
202            .scalar_iter("col")
203            .unwrap()
204            .collect::<PolarsResult<Vec<String>>>()
205            .unwrap();
206
207        assert_eq!(values, col_values)
208    }
209
210    #[cfg(feature = "dtype-categorical")]
211    #[test]
212    fn cat_rows_iter_opt_test() {
213        let mut rng = StdRng::seed_from_u64(0);
214        let height = ROW_COUNT;
215
216        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
217        let dtype = DataType::from_categories(cats);
218
219        let col = create_column("col", &dtype, false, height, &mut rng);
220        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
221
222        let col_opt_values = col_opt
223            .cat32()
224            .unwrap()
225            .iter_str()
226            .map(|v| v.map(|s| s.to_owned()))
227            .collect_vec();
228
229        let df = DataFrame::new(vec![col, col_opt]).unwrap();
230
231        let values = df
232            .scalar_iter("col_opt")
233            .unwrap()
234            .collect::<PolarsResult<Vec<Option<String>>>>()
235            .unwrap();
236
237        assert_eq!(values, col_opt_values)
238    }
239
240    #[cfg(feature = "dtype-categorical")]
241    #[test]
242    fn enum_rows_iter_test() {
243        let mut rng = StdRng::seed_from_u64(0);
244        let height = ROW_COUNT;
245
246        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
247        let dtype = DataType::from_frozen_categories(categories);
248
249        println!("{dtype:?}");
250
251        let col = create_column("col", &dtype, false, height, &mut rng);
252        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
253
254        let col_values = col
255            .cat8()
256            .unwrap()
257            .iter_str()
258            .map(|v| v.unwrap().to_owned())
259            .collect_vec();
260        let col_opt_values = col_opt
261            .cat8()
262            .unwrap()
263            .iter_str()
264            .map(|v| v.map(|s| s.to_owned()))
265            .collect_vec();
266
267        let df = DataFrame::new(vec![col, col_opt]).unwrap();
268
269        let col_iter = col_values.into_iter();
270        let col_opt_iter = col_opt_values.into_iter();
271
272        let expected_rows = izip!(col_iter, col_opt_iter)
273            .map(|(col, col_opt)| TestRow { col, col_opt })
274            .collect_vec();
275
276        #[derive(Debug, FromDataFrameRow, PartialEq)]
277        struct TestRow {
278            col: String,
279            col_opt: Option<String>,
280        }
281
282        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
283
284        assert_eq!(rows, expected_rows)
285    }
286
287    #[cfg(feature = "dtype-categorical")]
288    #[test]
289    fn enum_scalar_iter_test() {
290        let mut rng = StdRng::seed_from_u64(0);
291        let height = ROW_COUNT;
292
293        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
294        let dtype = DataType::from_frozen_categories(categories);
295
296        let col = create_column("col", &dtype, false, height, &mut rng);
297        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
298
299        let col_values = col
300            .cat8()
301            .unwrap()
302            .iter_str()
303            .map(|v| v.unwrap().to_owned())
304            .collect_vec();
305
306        let df = DataFrame::new(vec![col, col_opt]).unwrap();
307
308        let values = df
309            .scalar_iter("col")
310            .unwrap()
311            .collect::<PolarsResult<Vec<String>>>()
312            .unwrap();
313
314        assert_eq!(values, col_values)
315    }
316
317    #[cfg(feature = "dtype-categorical")]
318    #[test]
319    fn enum_scalar_iter_opt_test() {
320        let mut rng = StdRng::seed_from_u64(0);
321        let height = ROW_COUNT;
322
323        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
324        let dtype = DataType::from_frozen_categories(categories);
325
326        let col = create_column("col", &dtype, false, height, &mut rng);
327        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
328
329        let col_opt_values = col_opt
330            .cat8()
331            .unwrap()
332            .iter_str()
333            .map(|v| v.map(|s| s.to_owned()))
334            .collect_vec();
335
336        let df = DataFrame::new(vec![col, col_opt]).unwrap();
337
338        let values = df
339            .scalar_iter("col_opt")
340            .unwrap()
341            .collect::<PolarsResult<Vec<Option<String>>>>()
342            .unwrap();
343
344        assert_eq!(values, col_opt_values)
345    }
346}