polars_rows_iter/iter_from_column/
iter_from_column_string.rs

1use super::iter_from_column_str::create_iter;
2use super::*;
3use iter_from_column_trait::IterFromColumn;
4use polars::prelude::*;
5
6impl<'a> IterFromColumn<'a> for String {
7    type RawInner = &'a str;
8    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
9        create_iter(column)
10    }
11
12    #[inline]
13    fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
14    where
15        Self: Sized,
16    {
17        Ok(polars_value
18            .ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))?
19            .to_string())
20    }
21}
22
23impl<'a> IterFromColumn<'a> for Option<String> {
24    type RawInner = &'a str;
25    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
26        create_iter(column)
27    }
28
29    #[inline]
30    fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
31    where
32        Self: Sized,
33    {
34        Ok(polars_value.map(|s| s.to_string()))
35    }
36}
37
38#[cfg(test)]
39mod tests {
40    use crate::*;
41    use itertools::{izip, Itertools};
42    use polars::prelude::*;
43    use rand::{rngs::StdRng, SeedableRng};
44    use shared_test_helpers::*;
45
46    const ROW_COUNT: usize = 64;
47
48    #[test]
49    fn str_test() {
50        let mut rng = StdRng::seed_from_u64(0);
51        let height = ROW_COUNT;
52        let dtype = DataType::String;
53
54        let col = create_column("col", dtype.clone(), false, height, &mut rng);
55        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
56
57        let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
58        let col_opt_values = col_opt
59            .str()
60            .unwrap()
61            .iter()
62            .map(|v| v.map(|s| s.to_owned()))
63            .collect_vec();
64
65        let df = DataFrame::new(vec![col, col_opt]).unwrap();
66
67        let col_iter = col_values.into_iter();
68        let col_opt_iter = col_opt_values.into_iter();
69
70        let expected_rows = izip!(col_iter, col_opt_iter)
71            .map(|(col, col_opt)| TestRow { col, col_opt })
72            .collect_vec();
73
74        #[derive(Debug, FromDataFrameRow, PartialEq)]
75        struct TestRow {
76            col: String,
77            col_opt: Option<String>,
78        }
79
80        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
81
82        assert_eq!(rows, expected_rows)
83    }
84
85    #[cfg(feature = "dtype-categorical")]
86    #[test]
87    fn cat_test() {
88        let mut rng = StdRng::seed_from_u64(0);
89        let height = ROW_COUNT;
90
91        let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
92        let dtype = DataType::from_categories(cats);
93
94        let col = create_column("col", dtype.clone(), false, height, &mut rng);
95        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
96
97        let col_values = col
98            .cat::<Categorical32Type>()
99            .unwrap()
100            .iter_str()
101            .map(|v| v.unwrap().to_owned())
102            .collect_vec();
103        let col_opt_values = col_opt
104            .cat::<Categorical32Type>()
105            .unwrap()
106            .iter_str()
107            .map(|v| v.map(|s| s.to_owned()))
108            .collect_vec();
109
110        let df = DataFrame::new(vec![col, col_opt]).unwrap();
111
112        let col_iter = col_values.into_iter();
113        let col_opt_iter = col_opt_values.into_iter();
114
115        let expected_rows = izip!(col_iter, col_opt_iter)
116            .map(|(col, col_opt)| TestRow { col, col_opt })
117            .collect_vec();
118
119        #[derive(Debug, FromDataFrameRow, PartialEq)]
120        struct TestRow {
121            col: String,
122            col_opt: Option<String>,
123        }
124
125        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
126
127        assert_eq!(rows, expected_rows)
128    }
129
130    #[cfg(feature = "dtype-categorical")]
131    #[test]
132    fn enum_test() {
133        let mut rng = StdRng::seed_from_u64(0);
134        let height = ROW_COUNT;
135
136        let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
137        let dtype = DataType::from_frozen_categories(categories);
138
139        println!("{dtype:?}");
140
141        let col = create_column("col", dtype.clone(), false, height, &mut rng);
142        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
143
144        let col_values = col
145            .cat8()
146            .unwrap()
147            .iter_str()
148            .map(|v| v.unwrap().to_owned())
149            .collect_vec();
150        let col_opt_values = col_opt
151            .cat8()
152            .unwrap()
153            .iter_str()
154            .map(|v| v.map(|s| s.to_owned()))
155            .collect_vec();
156
157        let df = DataFrame::new(vec![col, col_opt]).unwrap();
158
159        let col_iter = col_values.into_iter();
160        let col_opt_iter = col_opt_values.into_iter();
161
162        let expected_rows = izip!(col_iter, col_opt_iter)
163            .map(|(col, col_opt)| TestRow { col, col_opt })
164            .collect_vec();
165
166        #[derive(Debug, FromDataFrameRow, PartialEq)]
167        struct TestRow {
168            col: String,
169            col_opt: Option<String>,
170        }
171
172        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
173
174        assert_eq!(rows, expected_rows)
175    }
176}