polars_rows_iter/iter_from_column/
iter_from_column_binary.rs

1use crate::*;
2use polars::prelude::*;
3
4impl<'a> IterFromColumn<'a> for &'a [u8] {
5    type RawInner = &'a [u8];
6    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a [u8]>> + 'a>> {
7        create_iter(column)
8    }
9
10    #[inline]
11    fn get_value(polars_value: Option<&'a [u8]>, column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
12    where
13        Self: Sized,
14    {
15        polars_value.ok_or_else(|| <&[u8] as IterFromColumn<'a>>::unexpected_null_value_error(column_name))
16    }
17}
18
19impl<'a> IterFromColumn<'a> for Option<&'a [u8]> {
20    type RawInner = &'a [u8];
21    fn create_iter(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a [u8]>> + 'a>> {
22        create_iter(column)
23    }
24
25    #[inline]
26    fn get_value(polars_value: Option<&'a [u8]>, _column_name: &str, _dtype: &DataType) -> PolarsResult<Self>
27    where
28        Self: Sized,
29    {
30        Ok(polars_value)
31    }
32}
33
34fn create_iter<'a>(column: &'a Column) -> PolarsResult<Box<dyn Iterator<Item = Option<&'a [u8]>> + 'a>> {
35    let column_name = column.name().as_str();
36    let iter: Box<dyn Iterator<Item = Option<&[u8]>>> = match column.dtype() {
37        DataType::Binary => Box::new(column.binary()?.iter()),
38        DataType::BinaryOffset => Box::new(column.binary_offset()?.iter()),
39        dtype => {
40            return Err(
41                polars_err!(SchemaMismatch: "Cannot get &[u8] from column '{column_name}' with dtype : {dtype}"),
42            )
43        }
44    };
45
46    Ok(iter)
47}
48
49#[cfg(test)]
50mod tests {
51    const ROW_COUNT: usize = 64;
52
53    use super::*;
54    use itertools::{izip, Itertools};
55    use rand::{rngs::StdRng, SeedableRng};
56    use shared_test_helpers::*;
57
58    #[test]
59    fn binary_test() {
60        let mut rng = StdRng::seed_from_u64(0);
61        let height = ROW_COUNT;
62        let dtype = DataType::Binary;
63
64        let col = create_column("col", dtype.clone(), false, height, &mut rng);
65        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
66
67        let col_values = col.clone();
68        let col_values = col_values.binary().unwrap().iter().map(|v| v.unwrap()).collect_vec();
69
70        let col_opt_values = col_opt.clone();
71        let col_opt_values = col_opt_values.binary().unwrap().iter().collect_vec();
72
73        let df = DataFrame::new(vec![col, col_opt]).unwrap();
74
75        let col_iter = col_values.iter();
76        let col_opt_iter = col_opt_values.iter();
77
78        let expected_rows = izip!(col_iter, col_opt_iter)
79            .map(|(&col, &col_opt)| TestRow { col, col_opt })
80            .collect_vec();
81
82        #[derive(Debug, FromDataFrameRow, PartialEq)]
83        struct TestRow<'a> {
84            col: &'a [u8],
85            col_opt: Option<&'a [u8]>,
86        }
87
88        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
89
90        assert_eq!(rows, expected_rows)
91    }
92
93    #[test]
94    fn binary_offset_test() {
95        let mut rng = StdRng::seed_from_u64(0);
96        let height = ROW_COUNT;
97        let dtype = DataType::BinaryOffset;
98
99        let col = create_column("col", dtype.clone(), false, height, &mut rng);
100        let col_opt = create_column("col_opt", dtype, true, height, &mut rng);
101
102        let col_values = col.clone();
103        let col_values = col_values
104            .binary_offset()
105            .unwrap()
106            .iter()
107            .map(|v| v.unwrap())
108            .collect_vec();
109
110        let col_opt_values = col_opt.clone();
111        let col_opt_values = col_opt_values.binary_offset().unwrap().iter().collect_vec();
112
113        let df = DataFrame::new(vec![col, col_opt]).unwrap();
114
115        let col_iter = col_values.iter();
116        let col_opt_iter = col_opt_values.iter();
117
118        let expected_rows = izip!(col_iter, col_opt_iter)
119            .map(|(&col, &col_opt)| TestRow { col, col_opt })
120            .collect_vec();
121
122        #[derive(Debug, FromDataFrameRow, PartialEq)]
123        struct TestRow<'a> {
124            col: &'a [u8],
125            col_opt: Option<&'a [u8]>,
126        }
127
128        let rows = df.rows_iter::<TestRow>().unwrap().map(|v| v.unwrap()).collect_vec();
129
130        assert_eq!(rows, expected_rows)
131    }
132}