polars_rows_iter/iter_from_column/
iter_from_column_str.rs

1use super::*;
2use iter_from_column_trait::IterFromColumn;
3use polars::prelude::*;
4
5impl<'a> IterFromColumn<'a> for &'a str {
6    type RawInner = &'a str;
7    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
8        create_iter(column)
9    }
10
11    #[inline]
12    fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
13    where
14        Self: Sized,
15    {
16        polars_value.ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))
17    }
18}
19
20impl<'a> IterFromColumn<'a> for Option<&'a str> {
21    type RawInner = &'a str;
22    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
23        create_iter(column)
24    }
25
26    #[inline]
27    fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
28    where
29        Self: Sized,
30    {
31        Ok(polars_value)
32    }
33}
34
35fn create_str_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
36    Ok(Box::new(column.str()?.iter()))
37}
38
39#[cfg(feature = "dtype-categorical")]
40fn create_cat_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
41    Ok(Box::new(column.categorical()?.iter_str()))
42}
43
44pub fn create_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
45    let iter = match column.dtype() {
46        DataType::String => create_str_iter(column)?,
47        #[cfg(feature = "dtype-categorical")]
48        DataType::Categorical(_, _) | DataType::Enum(_, _) => create_cat_iter(column)?,
49        dtype => {
50            let column_name = column.name().as_str();
51            return Err(
52                polars_err!(SchemaMismatch: "Cannot get &str from column '{column_name}' with dtype '{dtype}'.\
53                                             Make sure to enable 'dtype-categorical' feature for 'Categorical' and 'Enum' dtypes."),
54            );
55        }
56    };
57
58    Ok(iter)
59}
60
61#[cfg(test)]
62mod tests {
63    use crate::*;
64    use itertools::{izip, Itertools};
65    use polars::prelude::*;
66    use rand::{rngs::StdRng, SeedableRng};
67    use shared_test_helpers::*;
68
69    const ROW_COUNT: usize = 64;
70
71    #[test]
72    fn str_test() {
73        let mut rng = StdRng::seed_from_u64(0);
74        let height = ROW_COUNT;
75        let dtype = DataType::String;
76
77        let col = create_column("col", dtype.clone(), false, height, &mut rng);
78        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
79
80        let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
81        let col_opt_values = col_opt
82            .str()
83            .unwrap()
84            .iter()
85            .map(|v| v.map(|s| s.to_owned()))
86            .collect_vec();
87
88        let df = DataFrame::new(vec![col, col_opt]).unwrap();
89
90        let col_iter = col_values.iter();
91        let col_opt_iter = col_opt_values.iter();
92
93        let expected_rows = izip!(col_iter, col_opt_iter)
94            .map(|(col, col_opt)| TestRow {
95                col: col.as_ref(),
96                col_opt: col_opt.as_ref().map(|v| v.as_str()),
97            })
98            .collect_vec();
99
100        #[derive(Debug, FromDataFrameRow, PartialEq)]
101        struct TestRow<'a> {
102            col: &'a str,
103            col_opt: Option<&'a str>,
104        }
105
106        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
107
108        assert_eq!(rows, expected_rows)
109    }
110
111    #[cfg(feature = "dtype-categorical")]
112    #[test]
113    fn cat_test() {
114        let mut rng = StdRng::seed_from_u64(0);
115        let height = ROW_COUNT;
116        let dtype = DataType::Categorical(None, CategoricalOrdering::Physical);
117
118        let col = create_column("col", dtype.clone(), false, height, &mut rng);
119        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
120
121        let col_values = col
122            .categorical()
123            .unwrap()
124            .iter_str()
125            .map(|v| v.unwrap().to_owned())
126            .collect_vec();
127        let col_opt_values = col_opt
128            .categorical()
129            .unwrap()
130            .iter_str()
131            .map(|v| v.map(|s| s.to_owned()))
132            .collect_vec();
133
134        let df = DataFrame::new(vec![col, col_opt]).unwrap();
135
136        let col_iter = col_values.iter();
137        let col_opt_iter = col_opt_values.iter();
138
139        let expected_rows = izip!(col_iter, col_opt_iter)
140            .map(|(col, col_opt)| TestRow {
141                col: col.as_ref(),
142                col_opt: col_opt.as_ref().map(|v| v.as_str()),
143            })
144            .collect_vec();
145
146        #[derive(Debug, FromDataFrameRow, PartialEq)]
147        struct TestRow<'a> {
148            col: &'a str,
149            col_opt: Option<&'a str>,
150        }
151
152        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
153
154        assert_eq!(rows, expected_rows)
155    }
156
157    #[cfg(feature = "dtype-categorical")]
158    #[test]
159    fn enum_test() {
160        let mut rng = StdRng::seed_from_u64(0);
161        let height = ROW_COUNT;
162
163        let enum_value_series = Series::new("enum".into(), &["A", "B", "C", "D", "E"]);
164        let categories = enum_value_series.str().unwrap().downcast_iter().next().unwrap().clone();
165        let dtype = create_enum_dtype(categories);
166
167        let col = create_column("col", dtype.clone(), false, height, &mut rng);
168        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
169
170        let col_values = col
171            .categorical()
172            .unwrap()
173            .iter_str()
174            .map(|v| v.unwrap().to_owned())
175            .collect_vec();
176        let col_opt_values = col_opt
177            .categorical()
178            .unwrap()
179            .iter_str()
180            .map(|v| v.map(|s| s.to_owned()))
181            .collect_vec();
182
183        let df = DataFrame::new(vec![col, col_opt]).unwrap();
184
185        let col_iter = col_values.iter();
186        let col_opt_iter = col_opt_values.iter();
187
188        let expected_rows = izip!(col_iter, col_opt_iter)
189            .map(|(col, col_opt)| TestRow {
190                col: col.as_ref(),
191                col_opt: col_opt.as_ref().map(|v| v.as_str()),
192            })
193            .collect_vec();
194
195        #[derive(Debug, FromDataFrameRow, PartialEq)]
196        struct TestRow<'a> {
197            col: &'a str,
198            col_opt: Option<&'a str>,
199        }
200
201        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
202
203        assert_eq!(rows, expected_rows)
204    }
205}