polars_rows_iter/iter_from_column/
iter_from_column_string.rs1use super::iter_from_column_str::create_iter;
2use super::*;
3use iter_from_column_trait::IterFromColumn;
4use polars::prelude::*;
5
6impl<'a> IterFromColumn<'a> for String {
7 type RawInner = &'a str;
8 fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
9 create_iter(column)
10 }
11
12 #[inline]
13 fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
14 where
15 Self: Sized,
16 {
17 Ok(polars_value
18 .ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))?
19 .to_string())
20 }
21}
22
23impl<'a> IterFromColumn<'a> for Option<String> {
24 type RawInner = &'a str;
25 fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
26 create_iter(column)
27 }
28
29 #[inline]
30 fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
31 where
32 Self: Sized,
33 {
34 Ok(polars_value.map(|s| s.to_string()))
35 }
36}
37
38#[cfg(test)]
39mod tests {
40 use crate::*;
41 use itertools::{izip, Itertools};
42 use polars::prelude::*;
43 use rand::{rngs::StdRng, SeedableRng};
44 use shared_test_helpers::*;
45
46 const ROW_COUNT: usize = 64;
47
48 #[test]
49 fn str_rows_iter_test() {
50 let mut rng = StdRng::seed_from_u64(0);
51 let height = ROW_COUNT;
52 let dtype = DataType::String;
53
54 let col = create_column("col", &dtype, false, height, &mut rng);
55 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
56
57 let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
58 let col_opt_values = col_opt
59 .str()
60 .unwrap()
61 .iter()
62 .map(|v| v.map(|s| s.to_owned()))
63 .collect_vec();
64
65 let df = DataFrame::new(vec![col, col_opt]).unwrap();
66
67 let col_iter = col_values.into_iter();
68 let col_opt_iter = col_opt_values.into_iter();
69
70 let expected_rows = izip!(col_iter, col_opt_iter)
71 .map(|(col, col_opt)| TestRow { col, col_opt })
72 .collect_vec();
73
74 #[derive(Debug, FromDataFrameRow, PartialEq)]
75 struct TestRow {
76 col: String,
77 col_opt: Option<String>,
78 }
79
80 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
81
82 assert_eq!(rows, expected_rows)
83 }
84 #[test]
85 fn str_scalar_iter_test() {
86 let mut rng = StdRng::seed_from_u64(0);
87 let height = ROW_COUNT;
88 let dtype = DataType::String;
89
90 let col = create_column("col", &dtype, false, height, &mut rng);
91 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
92
93 let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
94
95 let df = DataFrame::new(vec![col, col_opt]).unwrap();
96
97 let values = df
98 .scalar_iter("col")
99 .unwrap()
100 .collect::<PolarsResult<Vec<String>>>()
101 .unwrap();
102
103 assert_eq!(values, col_values)
104 }
105
106 #[test]
107 fn str_scalar_iter_opt_test() {
108 let mut rng = StdRng::seed_from_u64(0);
109 let height = ROW_COUNT;
110 let dtype = DataType::String;
111
112 let col = create_column("col", &dtype, false, height, &mut rng);
113 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
114
115 let col_opt_values = col_opt
116 .str()
117 .unwrap()
118 .iter()
119 .map(|v| v.map(|s| s.to_owned()))
120 .collect_vec();
121
122 let df = DataFrame::new(vec![col, col_opt]).unwrap();
123
124 let values = df
125 .scalar_iter("col_opt")
126 .unwrap()
127 .collect::<PolarsResult<Vec<Option<String>>>>()
128 .unwrap();
129
130 assert_eq!(values, col_opt_values)
131 }
132
133 #[cfg(feature = "dtype-categorical")]
134 #[test]
135 fn cat_rows_iter_test() {
136 let mut rng = StdRng::seed_from_u64(0);
137 let height = ROW_COUNT;
138
139 let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
140 let dtype = DataType::from_categories(cats);
141
142 let col = create_column("col", &dtype, false, height, &mut rng);
143 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
144
145 let col_values = col
146 .cat::<Categorical32Type>()
147 .unwrap()
148 .iter_str()
149 .map(|v| v.unwrap().to_owned())
150 .collect_vec();
151 let col_opt_values = col_opt
152 .cat::<Categorical32Type>()
153 .unwrap()
154 .iter_str()
155 .map(|v| v.map(|s| s.to_owned()))
156 .collect_vec();
157
158 let df = DataFrame::new(vec![col, col_opt]).unwrap();
159
160 let col_iter = col_values.into_iter();
161 let col_opt_iter = col_opt_values.into_iter();
162
163 let expected_rows = izip!(col_iter, col_opt_iter)
164 .map(|(col, col_opt)| TestRow { col, col_opt })
165 .collect_vec();
166
167 #[derive(Debug, FromDataFrameRow, PartialEq)]
168 struct TestRow {
169 col: String,
170 col_opt: Option<String>,
171 }
172
173 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
174
175 assert_eq!(rows, expected_rows)
176 }
177
178 #[cfg(feature = "dtype-categorical")]
179 #[test]
180 fn cat_scalar_iter_test() {
181 use crate::DataframeRowsIterExt;
182
183 let mut rng = StdRng::seed_from_u64(0);
184 let height = ROW_COUNT;
185
186 let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
187 let dtype = DataType::from_categories(cats);
188
189 let col = create_column("col", &dtype, false, height, &mut rng);
190 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
191
192 let col_values = col
193 .cat32()
194 .unwrap()
195 .iter_str()
196 .map(|v| v.unwrap().to_owned())
197 .collect_vec();
198
199 let df = DataFrame::new(vec![col, col_opt]).unwrap();
200
201 let values = df
202 .scalar_iter("col")
203 .unwrap()
204 .collect::<PolarsResult<Vec<String>>>()
205 .unwrap();
206
207 assert_eq!(values, col_values)
208 }
209
210 #[cfg(feature = "dtype-categorical")]
211 #[test]
212 fn cat_rows_iter_opt_test() {
213 let mut rng = StdRng::seed_from_u64(0);
214 let height = ROW_COUNT;
215
216 let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
217 let dtype = DataType::from_categories(cats);
218
219 let col = create_column("col", &dtype, false, height, &mut rng);
220 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
221
222 let col_opt_values = col_opt
223 .cat32()
224 .unwrap()
225 .iter_str()
226 .map(|v| v.map(|s| s.to_owned()))
227 .collect_vec();
228
229 let df = DataFrame::new(vec![col, col_opt]).unwrap();
230
231 let values = df
232 .scalar_iter("col_opt")
233 .unwrap()
234 .collect::<PolarsResult<Vec<Option<String>>>>()
235 .unwrap();
236
237 assert_eq!(values, col_opt_values)
238 }
239
240 #[cfg(feature = "dtype-categorical")]
241 #[test]
242 fn enum_rows_iter_test() {
243 let mut rng = StdRng::seed_from_u64(0);
244 let height = ROW_COUNT;
245
246 let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
247 let dtype = DataType::from_frozen_categories(categories);
248
249 println!("{dtype:?}");
250
251 let col = create_column("col", &dtype, false, height, &mut rng);
252 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
253
254 let col_values = col
255 .cat8()
256 .unwrap()
257 .iter_str()
258 .map(|v| v.unwrap().to_owned())
259 .collect_vec();
260 let col_opt_values = col_opt
261 .cat8()
262 .unwrap()
263 .iter_str()
264 .map(|v| v.map(|s| s.to_owned()))
265 .collect_vec();
266
267 let df = DataFrame::new(vec![col, col_opt]).unwrap();
268
269 let col_iter = col_values.into_iter();
270 let col_opt_iter = col_opt_values.into_iter();
271
272 let expected_rows = izip!(col_iter, col_opt_iter)
273 .map(|(col, col_opt)| TestRow { col, col_opt })
274 .collect_vec();
275
276 #[derive(Debug, FromDataFrameRow, PartialEq)]
277 struct TestRow {
278 col: String,
279 col_opt: Option<String>,
280 }
281
282 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
283
284 assert_eq!(rows, expected_rows)
285 }
286
287 #[cfg(feature = "dtype-categorical")]
288 #[test]
289 fn enum_scalar_iter_test() {
290 let mut rng = StdRng::seed_from_u64(0);
291 let height = ROW_COUNT;
292
293 let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
294 let dtype = DataType::from_frozen_categories(categories);
295
296 let col = create_column("col", &dtype, false, height, &mut rng);
297 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
298
299 let col_values = col
300 .cat8()
301 .unwrap()
302 .iter_str()
303 .map(|v| v.unwrap().to_owned())
304 .collect_vec();
305
306 let df = DataFrame::new(vec![col, col_opt]).unwrap();
307
308 let values = df
309 .scalar_iter("col")
310 .unwrap()
311 .collect::<PolarsResult<Vec<String>>>()
312 .unwrap();
313
314 assert_eq!(values, col_values)
315 }
316
317 #[cfg(feature = "dtype-categorical")]
318 #[test]
319 fn enum_scalar_iter_opt_test() {
320 let mut rng = StdRng::seed_from_u64(0);
321 let height = ROW_COUNT;
322
323 let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
324 let dtype = DataType::from_frozen_categories(categories);
325
326 let col = create_column("col", &dtype, false, height, &mut rng);
327 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
328
329 let col_opt_values = col_opt
330 .cat8()
331 .unwrap()
332 .iter_str()
333 .map(|v| v.map(|s| s.to_owned()))
334 .collect_vec();
335
336 let df = DataFrame::new(vec![col, col_opt]).unwrap();
337
338 let values = df
339 .scalar_iter("col_opt")
340 .unwrap()
341 .collect::<PolarsResult<Vec<Option<String>>>>()
342 .unwrap();
343
344 assert_eq!(values, col_opt_values)
345 }
346}