polars_rows_iter/iter_from_column/
iter_from_column_str.rs1use super::*;
2use iter_from_column_trait::IterFromColumn;
3use polars::prelude::*;
4
5impl<'a> IterFromColumn<'a> for &'a str {
6 type RawInner = &'a str;
7 fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
8 create_iter(column)
9 }
10
11 #[inline]
12 fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
13 where
14 Self: Sized,
15 {
16 polars_value.ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))
17 }
18}
19
20impl<'a> IterFromColumn<'a> for Option<&'a str> {
21 type RawInner = &'a str;
22 fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
23 create_iter(column)
24 }
25
26 #[inline]
27 fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
28 where
29 Self: Sized,
30 {
31 Ok(polars_value)
32 }
33}
34
35fn create_str_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
36 Ok(Box::new(column.str()?.iter()))
37}
38
39#[cfg(feature = "dtype-categorical")]
40fn create_cat_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
41 Ok(Box::new(column.categorical()?.iter_str()))
42}
43
44pub fn create_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
45 let iter = match column.dtype() {
46 DataType::String => create_str_iter(column)?,
47 #[cfg(feature = "dtype-categorical")]
48 DataType::Categorical(_, _) | DataType::Enum(_, _) => create_cat_iter(column)?,
49 dtype => {
50 let column_name = column.name().as_str();
51 return Err(
52 polars_err!(SchemaMismatch: "Cannot get &str from column '{column_name}' with dtype '{dtype}'.\
53 Make sure to enable 'dtype-categorical' feature for 'Categorical' and 'Enum' dtypes."),
54 );
55 }
56 };
57
58 Ok(iter)
59}
60
61#[cfg(test)]
62mod tests {
63 use crate::*;
64 use itertools::{izip, Itertools};
65 use polars::prelude::*;
66 use rand::{rngs::StdRng, SeedableRng};
67 use shared_test_helpers::*;
68
69 const ROW_COUNT: usize = 64;
70
71 #[test]
72 fn str_test() {
73 let mut rng = StdRng::seed_from_u64(0);
74 let height = ROW_COUNT;
75 let dtype = DataType::String;
76
77 let col = create_column("col", dtype.clone(), false, height, &mut rng);
78 let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
79
80 let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
81 let col_opt_values = col_opt
82 .str()
83 .unwrap()
84 .iter()
85 .map(|v| v.map(|s| s.to_owned()))
86 .collect_vec();
87
88 let df = DataFrame::new(vec![col, col_opt]).unwrap();
89
90 let col_iter = col_values.iter();
91 let col_opt_iter = col_opt_values.iter();
92
93 let expected_rows = izip!(col_iter, col_opt_iter)
94 .map(|(col, col_opt)| TestRow {
95 col: col.as_ref(),
96 col_opt: col_opt.as_ref().map(|v| v.as_str()),
97 })
98 .collect_vec();
99
100 #[derive(Debug, FromDataFrameRow, PartialEq)]
101 struct TestRow<'a> {
102 col: &'a str,
103 col_opt: Option<&'a str>,
104 }
105
106 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
107
108 assert_eq!(rows, expected_rows)
109 }
110
111 #[cfg(feature = "dtype-categorical")]
112 #[test]
113 fn cat_test() {
114 let mut rng = StdRng::seed_from_u64(0);
115 let height = ROW_COUNT;
116 let dtype = DataType::Categorical(None, CategoricalOrdering::Physical);
117
118 let col = create_column("col", dtype.clone(), false, height, &mut rng);
119 let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
120
121 let col_values = col
122 .categorical()
123 .unwrap()
124 .iter_str()
125 .map(|v| v.unwrap().to_owned())
126 .collect_vec();
127 let col_opt_values = col_opt
128 .categorical()
129 .unwrap()
130 .iter_str()
131 .map(|v| v.map(|s| s.to_owned()))
132 .collect_vec();
133
134 let df = DataFrame::new(vec![col, col_opt]).unwrap();
135
136 let col_iter = col_values.iter();
137 let col_opt_iter = col_opt_values.iter();
138
139 let expected_rows = izip!(col_iter, col_opt_iter)
140 .map(|(col, col_opt)| TestRow {
141 col: col.as_ref(),
142 col_opt: col_opt.as_ref().map(|v| v.as_str()),
143 })
144 .collect_vec();
145
146 #[derive(Debug, FromDataFrameRow, PartialEq)]
147 struct TestRow<'a> {
148 col: &'a str,
149 col_opt: Option<&'a str>,
150 }
151
152 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
153
154 assert_eq!(rows, expected_rows)
155 }
156
157 #[cfg(feature = "dtype-categorical")]
158 #[test]
159 fn enum_test() {
160 let mut rng = StdRng::seed_from_u64(0);
161 let height = ROW_COUNT;
162
163 let enum_value_series = Series::new("enum".into(), &["A", "B", "C", "D", "E"]);
164 let categories = enum_value_series.str().unwrap().downcast_iter().next().unwrap().clone();
165 let dtype = create_enum_dtype(categories);
166
167 let col = create_column("col", dtype.clone(), false, height, &mut rng);
168 let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
169
170 let col_values = col
171 .categorical()
172 .unwrap()
173 .iter_str()
174 .map(|v| v.unwrap().to_owned())
175 .collect_vec();
176 let col_opt_values = col_opt
177 .categorical()
178 .unwrap()
179 .iter_str()
180 .map(|v| v.map(|s| s.to_owned()))
181 .collect_vec();
182
183 let df = DataFrame::new(vec![col, col_opt]).unwrap();
184
185 let col_iter = col_values.iter();
186 let col_opt_iter = col_opt_values.iter();
187
188 let expected_rows = izip!(col_iter, col_opt_iter)
189 .map(|(col, col_opt)| TestRow {
190 col: col.as_ref(),
191 col_opt: col_opt.as_ref().map(|v| v.as_str()),
192 })
193 .collect_vec();
194
195 #[derive(Debug, FromDataFrameRow, PartialEq)]
196 struct TestRow<'a> {
197 col: &'a str,
198 col_opt: Option<&'a str>,
199 }
200
201 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
202
203 assert_eq!(rows, expected_rows)
204 }
205}