polars_rows_iter/iter_from_column/
iter_from_column_str.rs

1use super::*;
2use iter_from_column_trait::IterFromColumn;
3use polars::prelude::*;
4
5impl<'a> IterFromColumn<'a> for &'a str {
6    type RawInner = &'a str;
7    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
8        create_iter(column)
9    }
10
11    #[inline]
12    fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
13    where
14        Self: Sized,
15    {
16        polars_value.ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))
17    }
18}
19
20impl<'a> IterFromColumn<'a> for Option<&'a str> {
21    type RawInner = &'a str;
22    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
23        create_iter(column)
24    }
25
26    #[inline]
27    fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
28    where
29        Self: Sized,
30    {
31        Ok(polars_value)
32    }
33}
34
35fn create_str_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
36    Ok(Box::new(column.str()?.iter()))
37}
38
39#[cfg(feature = "dtype-categorical")]
40fn create_cat_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
41    Ok(Box::new(column.cat32()?.iter_str()))
42}
43
44#[cfg(feature = "dtype-categorical")]
45fn create_enum_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
46    Ok(Box::new(column.cat8()?.iter_str()))
47}
48
49pub fn create_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
50    let iter = match column.dtype() {
51        DataType::String => create_str_iter(column)?,
52        #[cfg(feature = "dtype-categorical")]
53        DataType::Categorical(_, _) => create_cat_iter(column)?,
54        #[cfg(feature = "dtype-categorical")]
55        DataType::Enum(_, _) => create_enum_iter(column)?,
56        dtype => {
57            let column_name = column.name().as_str();
58            return Err(
59                polars_err!(SchemaMismatch: "Cannot get &str from column '{column_name}' with dtype '{dtype}'.\
60                                             Make sure to enable 'dtype-categorical' feature for 'Categorical' and 'Enum' dtypes."),
61            );
62        }
63    };
64
65    Ok(iter)
66}
67
68#[cfg(test)]
69mod tests {
70    use crate::*;
71    use itertools::{izip, Itertools};
72    use polars::prelude::*;
73    use rand::{rngs::StdRng, SeedableRng};
74    use shared_test_helpers::*;
75
76    const ROW_COUNT: usize = 64;
77
78    #[test]
79    fn str_rows_iter_test() {
80        let mut rng = StdRng::seed_from_u64(0);
81        let height = ROW_COUNT;
82        let dtype = DataType::String;
83
84        let col = create_column("col", &dtype, false, height, &mut rng);
85        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
86
87        let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
88        let col_opt_values = col_opt
89            .str()
90            .unwrap()
91            .iter()
92            .map(|v| v.map(|s| s.to_owned()))
93            .collect_vec();
94
95        let df = DataFrame::new(vec![col, col_opt]).unwrap();
96
97        let col_iter = col_values.iter();
98        let col_opt_iter = col_opt_values.iter();
99
100        let expected_rows = izip!(col_iter, col_opt_iter)
101            .map(|(col, col_opt)| TestRow {
102                col: col.as_ref(),
103                col_opt: col_opt.as_ref().map(|v| v.as_str()),
104            })
105            .collect_vec();
106
107        #[derive(Debug, FromDataFrameRow, PartialEq)]
108        struct TestRow<'a> {
109            col: &'a str,
110            col_opt: Option<&'a str>,
111        }
112
113        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
114
115        assert_eq!(rows, expected_rows)
116    }
117
118    #[test]
119    fn str_scalar_iter_test() {
120        let mut rng = StdRng::seed_from_u64(0);
121        let height = ROW_COUNT;
122        let dtype = DataType::String;
123
124        let col = create_column("col", &dtype, false, height, &mut rng);
125        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
126
127        let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
128
129        let df = DataFrame::new(vec![col, col_opt]).unwrap();
130
131        let values = df
132            .scalar_iter("col")
133            .unwrap()
134            .collect::<PolarsResult<Vec<&str>>>()
135            .unwrap();
136
137        assert_eq!(values, col_values)
138    }
139
140    #[test]
141    fn str_scalar_iter_opt_test() {
142        let mut rng = StdRng::seed_from_u64(0);
143        let height = ROW_COUNT;
144        let dtype = DataType::String;
145
146        let col = create_column("col", &dtype, false, height, &mut rng);
147        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
148
149        let col_opt_values = col_opt
150            .str()
151            .unwrap()
152            .iter()
153            .map(|v| v.map(|s| s.to_owned()))
154            .collect_vec();
155
156        let df = DataFrame::new(vec![col, col_opt]).unwrap();
157
158        let col_opt_values = col_opt_values
159            .iter()
160            .map(|v| v.as_ref().map(|s| s.as_str()))
161            .collect_vec();
162
163        let values = df
164            .scalar_iter("col_opt")
165            .unwrap()
166            .collect::<PolarsResult<Vec<Option<&str>>>>()
167            .unwrap();
168
169        assert_eq!(values, col_opt_values)
170    }
171
172    #[cfg(feature = "dtype-categorical")]
173    #[test]
174    fn cat_rows_iter_test() {
175        let mut rng = StdRng::seed_from_u64(0);
176        let height = ROW_COUNT;
177
178        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
179        let dtype = DataType::from_categories(cats);
180
181        let col = create_column("col", &dtype, false, height, &mut rng);
182        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
183
184        let col_values = col
185            .cat32()
186            .unwrap()
187            .iter_str()
188            .map(|v| v.unwrap().to_owned())
189            .collect_vec();
190        let col_opt_values = col_opt
191            .cat32()
192            .unwrap()
193            .iter_str()
194            .map(|v| v.map(|s| s.to_owned()))
195            .collect_vec();
196
197        let df = DataFrame::new(vec![col, col_opt]).unwrap();
198
199        let col_iter = col_values.iter();
200        let col_opt_iter = col_opt_values.iter();
201
202        let expected_rows = izip!(col_iter, col_opt_iter)
203            .map(|(col, col_opt)| TestRow {
204                col: col.as_ref(),
205                col_opt: col_opt.as_ref().map(|v| v.as_str()),
206            })
207            .collect_vec();
208
209        #[derive(Debug, FromDataFrameRow, PartialEq)]
210        struct TestRow<'a> {
211            col: &'a str,
212            col_opt: Option<&'a str>,
213        }
214
215        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
216
217        assert_eq!(rows, expected_rows)
218    }
219
220    #[cfg(feature = "dtype-categorical")]
221    #[test]
222    fn cat_scalar_iter_test() {
223        use crate::DataframeRowsIterExt;
224
225        let mut rng = StdRng::seed_from_u64(0);
226        let height = ROW_COUNT;
227
228        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
229        let dtype = DataType::from_categories(cats);
230
231        let col = create_column("col", &dtype, false, height, &mut rng);
232        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
233
234        let col_values = col
235            .cat32()
236            .unwrap()
237            .iter_str()
238            .map(|v| v.unwrap().to_owned())
239            .collect_vec();
240
241        let df = DataFrame::new(vec![col, col_opt]).unwrap();
242
243        let values = df
244            .scalar_iter("col")
245            .unwrap()
246            .collect::<PolarsResult<Vec<&str>>>()
247            .unwrap();
248
249        assert_eq!(values, col_values)
250    }
251
252    #[cfg(feature = "dtype-categorical")]
253    #[test]
254    fn cat_rows_iter_opt_test() {
255        let mut rng = StdRng::seed_from_u64(0);
256        let height = ROW_COUNT;
257
258        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
259        let dtype = DataType::from_categories(cats);
260
261        let col = create_column("col", &dtype, false, height, &mut rng);
262        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
263
264        let col_opt_values = col_opt
265            .cat32()
266            .unwrap()
267            .iter_str()
268            .map(|v| v.map(|s| s.to_owned()))
269            .collect_vec();
270
271        let df = DataFrame::new(vec![col, col_opt]).unwrap();
272
273        let col_opt_values = col_opt_values
274            .iter()
275            .map(|v| v.as_ref().map(|s| s.as_str()))
276            .collect_vec();
277
278        let values = df
279            .scalar_iter("col_opt")
280            .unwrap()
281            .collect::<PolarsResult<Vec<Option<&str>>>>()
282            .unwrap();
283
284        assert_eq!(values, col_opt_values)
285    }
286
287    #[cfg(feature = "dtype-categorical")]
288    #[test]
289    fn enum_rows_iter_test() {
290        let mut rng = StdRng::seed_from_u64(0);
291        let height = ROW_COUNT;
292
293        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
294        let dtype = DataType::from_frozen_categories(categories);
295
296        let col = create_column("col", &dtype, false, height, &mut rng);
297        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
298
299        let col_values = col
300            .cat8()
301            .unwrap()
302            .iter_str()
303            .map(|v| v.unwrap().to_owned())
304            .collect_vec();
305        let col_opt_values = col_opt
306            .cat8()
307            .unwrap()
308            .iter_str()
309            .map(|v| v.map(|s| s.to_owned()))
310            .collect_vec();
311
312        let df = DataFrame::new(vec![col, col_opt]).unwrap();
313
314        let col_iter = col_values.iter();
315        let col_opt_iter = col_opt_values.iter();
316
317        let expected_rows = izip!(col_iter, col_opt_iter)
318            .map(|(col, col_opt)| TestRow {
319                col: col.as_ref(),
320                col_opt: col_opt.as_ref().map(|v| v.as_str()),
321            })
322            .collect_vec();
323
324        #[derive(Debug, FromDataFrameRow, PartialEq)]
325        struct TestRow<'a> {
326            col: &'a str,
327            col_opt: Option<&'a str>,
328        }
329
330        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
331
332        assert_eq!(rows, expected_rows)
333    }
334
335    #[cfg(feature = "dtype-categorical")]
336    #[test]
337    fn enum_scalar_iter_test() {
338        let mut rng = StdRng::seed_from_u64(0);
339        let height = ROW_COUNT;
340
341        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
342        let dtype = DataType::from_frozen_categories(categories);
343
344        let col = create_column("col", &dtype, false, height, &mut rng);
345        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
346
347        let col_values = col
348            .cat8()
349            .unwrap()
350            .iter_str()
351            .map(|v| v.unwrap().to_owned())
352            .collect_vec();
353
354        let df = DataFrame::new(vec![col, col_opt]).unwrap();
355
356        let values = df
357            .scalar_iter("col")
358            .unwrap()
359            .collect::<PolarsResult<Vec<&str>>>()
360            .unwrap();
361
362        assert_eq!(values, col_values)
363    }
364
365    #[cfg(feature = "dtype-categorical")]
366    #[test]
367    fn enum_scalar_iter_opt_test() {
368        let mut rng = StdRng::seed_from_u64(0);
369        let height = ROW_COUNT;
370
371        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
372        let dtype = DataType::from_frozen_categories(categories);
373
374        let col = create_column("col", &dtype, false, height, &mut rng);
375        let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
376
377        let col_opt_values = col_opt
378            .cat8()
379            .unwrap()
380            .iter_str()
381            .map(|v| v.map(|s| s.to_owned()))
382            .collect_vec();
383
384        let df = DataFrame::new(vec![col, col_opt]).unwrap();
385
386        let col_opt_values = col_opt_values
387            .iter()
388            .map(|v| v.as_ref().map(|s| s.as_str()))
389            .collect_vec();
390
391        let values = df
392            .scalar_iter("col_opt")
393            .unwrap()
394            .collect::<PolarsResult<Vec<Option<&str>>>>()
395            .unwrap();
396
397        assert_eq!(values, col_opt_values)
398    }
399}