polars_rows_iter/iter_from_column/
iter_from_column_str.rs1use super::*;
2use iter_from_column_trait::IterFromColumn;
3use polars::prelude::*;
4
5impl<'a> IterFromColumn<'a> for &'a str {
6 type RawInner = &'a str;
7 fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
8 create_iter(column)
9 }
10
11 #[inline]
12 fn get_value(polars_value: Option<&'a str>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
13 where
14 Self: Sized,
15 {
16 polars_value.ok_or_else(|| <&'a str as IterFromColumn<'a>>::unexpected_null_value_error(column_name))
17 }
18}
19
20impl<'a> IterFromColumn<'a> for Option<&'a str> {
21 type RawInner = &'a str;
22 fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
23 create_iter(column)
24 }
25
26 #[inline]
27 fn get_value(polars_value: Option<&'a str>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
28 where
29 Self: Sized,
30 {
31 Ok(polars_value)
32 }
33}
34
35fn create_str_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
36 Ok(Box::new(column.str()?.iter()))
37}
38
39#[cfg(feature = "dtype-categorical")]
40fn create_cat_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
41 Ok(Box::new(column.cat32()?.iter_str()))
42}
43
44#[cfg(feature = "dtype-categorical")]
45fn create_enum_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
46 Ok(Box::new(column.cat8()?.iter_str()))
47}
48
49pub fn create_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a str>> + 'a>> {
50 let iter = match column.dtype() {
51 DataType::String => create_str_iter(column)?,
52 #[cfg(feature = "dtype-categorical")]
53 DataType::Categorical(_, _) => create_cat_iter(column)?,
54 #[cfg(feature = "dtype-categorical")]
55 DataType::Enum(_, _) => create_enum_iter(column)?,
56 dtype => {
57 let column_name = column.name().as_str();
58 return Err(
59 polars_err!(SchemaMismatch: "Cannot get &str from column '{column_name}' with dtype '{dtype}'.\
60 Make sure to enable 'dtype-categorical' feature for 'Categorical' and 'Enum' dtypes."),
61 );
62 }
63 };
64
65 Ok(iter)
66}
67
68#[cfg(test)]
69mod tests {
70 use crate::*;
71 use itertools::{izip, Itertools};
72 use polars::prelude::*;
73 use rand::{rngs::StdRng, SeedableRng};
74 use shared_test_helpers::*;
75
76 const ROW_COUNT: usize = 64;
77
78 #[test]
79 fn str_rows_iter_test() {
80 let mut rng = StdRng::seed_from_u64(0);
81 let height = ROW_COUNT;
82 let dtype = DataType::String;
83
84 let col = create_column("col", &dtype, false, height, &mut rng);
85 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
86
87 let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
88 let col_opt_values = col_opt
89 .str()
90 .unwrap()
91 .iter()
92 .map(|v| v.map(|s| s.to_owned()))
93 .collect_vec();
94
95 let df = DataFrame::new(vec![col, col_opt]).unwrap();
96
97 let col_iter = col_values.iter();
98 let col_opt_iter = col_opt_values.iter();
99
100 let expected_rows = izip!(col_iter, col_opt_iter)
101 .map(|(col, col_opt)| TestRow {
102 col: col.as_ref(),
103 col_opt: col_opt.as_ref().map(|v| v.as_str()),
104 })
105 .collect_vec();
106
107 #[derive(Debug, FromDataFrameRow, PartialEq)]
108 struct TestRow<'a> {
109 col: &'a str,
110 col_opt: Option<&'a str>,
111 }
112
113 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
114
115 assert_eq!(rows, expected_rows)
116 }
117
118 #[test]
119 fn str_scalar_iter_test() {
120 let mut rng = StdRng::seed_from_u64(0);
121 let height = ROW_COUNT;
122 let dtype = DataType::String;
123
124 let col = create_column("col", &dtype, false, height, &mut rng);
125 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
126
127 let col_values = col.str().unwrap().iter().map(|v| v.unwrap().to_owned()).collect_vec();
128
129 let df = DataFrame::new(vec![col, col_opt]).unwrap();
130
131 let values = df
132 .scalar_iter("col")
133 .unwrap()
134 .collect::<PolarsResult<Vec<&str>>>()
135 .unwrap();
136
137 assert_eq!(values, col_values)
138 }
139
140 #[test]
141 fn str_scalar_iter_opt_test() {
142 let mut rng = StdRng::seed_from_u64(0);
143 let height = ROW_COUNT;
144 let dtype = DataType::String;
145
146 let col = create_column("col", &dtype, false, height, &mut rng);
147 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
148
149 let col_opt_values = col_opt
150 .str()
151 .unwrap()
152 .iter()
153 .map(|v| v.map(|s| s.to_owned()))
154 .collect_vec();
155
156 let df = DataFrame::new(vec![col, col_opt]).unwrap();
157
158 let col_opt_values = col_opt_values
159 .iter()
160 .map(|v| v.as_ref().map(|s| s.as_str()))
161 .collect_vec();
162
163 let values = df
164 .scalar_iter("col_opt")
165 .unwrap()
166 .collect::<PolarsResult<Vec<Option<&str>>>>()
167 .unwrap();
168
169 assert_eq!(values, col_opt_values)
170 }
171
172 #[cfg(feature = "dtype-categorical")]
173 #[test]
174 fn cat_rows_iter_test() {
175 let mut rng = StdRng::seed_from_u64(0);
176 let height = ROW_COUNT;
177
178 let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
179 let dtype = DataType::from_categories(cats);
180
181 let col = create_column("col", &dtype, false, height, &mut rng);
182 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
183
184 let col_values = col
185 .cat32()
186 .unwrap()
187 .iter_str()
188 .map(|v| v.unwrap().to_owned())
189 .collect_vec();
190 let col_opt_values = col_opt
191 .cat32()
192 .unwrap()
193 .iter_str()
194 .map(|v| v.map(|s| s.to_owned()))
195 .collect_vec();
196
197 let df = DataFrame::new(vec![col, col_opt]).unwrap();
198
199 let col_iter = col_values.iter();
200 let col_opt_iter = col_opt_values.iter();
201
202 let expected_rows = izip!(col_iter, col_opt_iter)
203 .map(|(col, col_opt)| TestRow {
204 col: col.as_ref(),
205 col_opt: col_opt.as_ref().map(|v| v.as_str()),
206 })
207 .collect_vec();
208
209 #[derive(Debug, FromDataFrameRow, PartialEq)]
210 struct TestRow<'a> {
211 col: &'a str,
212 col_opt: Option<&'a str>,
213 }
214
215 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
216
217 assert_eq!(rows, expected_rows)
218 }
219
220 #[cfg(feature = "dtype-categorical")]
221 #[test]
222 fn cat_scalar_iter_test() {
223 use crate::DataframeRowsIterExt;
224
225 let mut rng = StdRng::seed_from_u64(0);
226 let height = ROW_COUNT;
227
228 let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
229 let dtype = DataType::from_categories(cats);
230
231 let col = create_column("col", &dtype, false, height, &mut rng);
232 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
233
234 let col_values = col
235 .cat32()
236 .unwrap()
237 .iter_str()
238 .map(|v| v.unwrap().to_owned())
239 .collect_vec();
240
241 let df = DataFrame::new(vec![col, col_opt]).unwrap();
242
243 let values = df
244 .scalar_iter("col")
245 .unwrap()
246 .collect::<PolarsResult<Vec<&str>>>()
247 .unwrap();
248
249 assert_eq!(values, col_values)
250 }
251
252 #[cfg(feature = "dtype-categorical")]
253 #[test]
254 fn cat_rows_iter_opt_test() {
255 let mut rng = StdRng::seed_from_u64(0);
256 let height = ROW_COUNT;
257
258 let cats = Categories::new(PlSmallStr::EMPTY, PlSmallStr::EMPTY, CategoricalPhysical::U32);
259 let dtype = DataType::from_categories(cats);
260
261 let col = create_column("col", &dtype, false, height, &mut rng);
262 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
263
264 let col_opt_values = col_opt
265 .cat32()
266 .unwrap()
267 .iter_str()
268 .map(|v| v.map(|s| s.to_owned()))
269 .collect_vec();
270
271 let df = DataFrame::new(vec![col, col_opt]).unwrap();
272
273 let col_opt_values = col_opt_values
274 .iter()
275 .map(|v| v.as_ref().map(|s| s.as_str()))
276 .collect_vec();
277
278 let values = df
279 .scalar_iter("col_opt")
280 .unwrap()
281 .collect::<PolarsResult<Vec<Option<&str>>>>()
282 .unwrap();
283
284 assert_eq!(values, col_opt_values)
285 }
286
287 #[cfg(feature = "dtype-categorical")]
288 #[test]
289 fn enum_rows_iter_test() {
290 let mut rng = StdRng::seed_from_u64(0);
291 let height = ROW_COUNT;
292
293 let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
294 let dtype = DataType::from_frozen_categories(categories);
295
296 let col = create_column("col", &dtype, false, height, &mut rng);
297 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
298
299 let col_values = col
300 .cat8()
301 .unwrap()
302 .iter_str()
303 .map(|v| v.unwrap().to_owned())
304 .collect_vec();
305 let col_opt_values = col_opt
306 .cat8()
307 .unwrap()
308 .iter_str()
309 .map(|v| v.map(|s| s.to_owned()))
310 .collect_vec();
311
312 let df = DataFrame::new(vec![col, col_opt]).unwrap();
313
314 let col_iter = col_values.iter();
315 let col_opt_iter = col_opt_values.iter();
316
317 let expected_rows = izip!(col_iter, col_opt_iter)
318 .map(|(col, col_opt)| TestRow {
319 col: col.as_ref(),
320 col_opt: col_opt.as_ref().map(|v| v.as_str()),
321 })
322 .collect_vec();
323
324 #[derive(Debug, FromDataFrameRow, PartialEq)]
325 struct TestRow<'a> {
326 col: &'a str,
327 col_opt: Option<&'a str>,
328 }
329
330 let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
331
332 assert_eq!(rows, expected_rows)
333 }
334
335 #[cfg(feature = "dtype-categorical")]
336 #[test]
337 fn enum_scalar_iter_test() {
338 let mut rng = StdRng::seed_from_u64(0);
339 let height = ROW_COUNT;
340
341 let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
342 let dtype = DataType::from_frozen_categories(categories);
343
344 let col = create_column("col", &dtype, false, height, &mut rng);
345 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
346
347 let col_values = col
348 .cat8()
349 .unwrap()
350 .iter_str()
351 .map(|v| v.unwrap().to_owned())
352 .collect_vec();
353
354 let df = DataFrame::new(vec![col, col_opt]).unwrap();
355
356 let values = df
357 .scalar_iter("col")
358 .unwrap()
359 .collect::<PolarsResult<Vec<&str>>>()
360 .unwrap();
361
362 assert_eq!(values, col_values)
363 }
364
365 #[cfg(feature = "dtype-categorical")]
366 #[test]
367 fn enum_scalar_iter_opt_test() {
368 let mut rng = StdRng::seed_from_u64(0);
369 let height = ROW_COUNT;
370
371 let categories = FrozenCategories::new(["A", "B", "C", "D", "E"]).unwrap();
372 let dtype = DataType::from_frozen_categories(categories);
373
374 let col = create_column("col", &dtype, false, height, &mut rng);
375 let col_opt = create_column("col_opt", &dtype, true, height, &mut rng);
376
377 let col_opt_values = col_opt
378 .cat8()
379 .unwrap()
380 .iter_str()
381 .map(|v| v.map(|s| s.to_owned()))
382 .collect_vec();
383
384 let df = DataFrame::new(vec![col, col_opt]).unwrap();
385
386 let col_opt_values = col_opt_values
387 .iter()
388 .map(|v| v.as_ref().map(|s| s.as_str()))
389 .collect_vec();
390
391 let values = df
392 .scalar_iter("col_opt")
393 .unwrap()
394 .collect::<PolarsResult<Vec<Option<&str>>>>()
395 .unwrap();
396
397 assert_eq!(values, col_opt_values)
398 }
399}