Skip to main content

polars_parquet/arrow/read/
expr.rs

1use std::sync::Arc;
2
3use arrow::array::Array;
4use arrow::bitmap::{Bitmap, BitmapBuilder};
5use arrow::types::AlignedBytes;
6
7#[derive(Clone)]
8pub enum ParquetScalar {
9    Null,
10
11    Boolean(bool),
12
13    Int8(i8),
14    Int16(i16),
15    Int32(i32),
16    Int64(i64),
17    UInt8(u8),
18    UInt16(u16),
19    UInt32(u32),
20    UInt64(u64),
21
22    Float32(f32),
23    Float64(f64),
24
25    FixedSizeBinary(Box<[u8]>),
26
27    String(Box<str>),
28    Binary(Box<[u8]>),
29}
30
31impl ParquetScalar {
32    pub(crate) fn is_null(&self) -> bool {
33        matches!(self, Self::Null)
34    }
35
36    pub(crate) fn to_aligned_bytes<B: AlignedBytes>(&self) -> Option<B> {
37        match self {
38            Self::Int8(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
39                .ok()
40                .map(B::from_unaligned),
41            Self::Int16(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
42                .ok()
43                .map(B::from_unaligned),
44            Self::Int32(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
45                .ok()
46                .map(B::from_unaligned),
47            Self::Int64(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
48                .ok()
49                .map(B::from_unaligned),
50            Self::UInt8(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
51                .ok()
52                .map(B::from_unaligned),
53            Self::UInt16(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
54                .ok()
55                .map(B::from_unaligned),
56            Self::UInt32(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
57                .ok()
58                .map(B::from_unaligned),
59            Self::UInt64(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
60                .ok()
61                .map(B::from_unaligned),
62            Self::Float32(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
63                .ok()
64                .map(B::from_unaligned),
65            Self::Float64(v) => <B::Unaligned>::try_from(&v.to_le_bytes())
66                .ok()
67                .map(B::from_unaligned),
68            _ => None,
69        }
70    }
71
72    pub(crate) fn as_str(&self) -> Option<&str> {
73        match self {
74            Self::String(s) => Some(s.as_ref()),
75            _ => None,
76        }
77    }
78
79    pub(crate) fn as_binary(&self) -> Option<&[u8]> {
80        match self {
81            Self::Binary(s) => Some(s.as_ref()),
82            _ => None,
83        }
84    }
85
86    pub(crate) fn as_bool(&self) -> Option<bool> {
87        match self {
88            Self::Boolean(s) => Some(*s),
89            _ => None,
90        }
91    }
92}
93
94#[derive(Clone)]
95pub enum SpecializedParquetColumnExpr {
96    Equal(ParquetScalar),
97    Between(ParquetScalar, ParquetScalar),
98    EqualOneOf(Box<[ParquetScalar]>),
99    StartsWith(Box<[u8]>),
100    EndsWith(Box<[u8]>),
101    RegexMatch(regex::bytes::Regex),
102}
103
104pub type ParquetColumnExprRef = Arc<dyn ParquetColumnExpr>;
105pub trait ParquetColumnExpr: Send + Sync {
106    fn evaluate(&self, values: &dyn Array) -> Bitmap {
107        let mut bm = BitmapBuilder::new();
108        self.evaluate_mut(values, &mut bm);
109        bm.freeze()
110    }
111    fn evaluate_mut(&self, values: &dyn Array, bm: &mut BitmapBuilder);
112    fn evaluate_null(&self) -> bool;
113
114    fn as_specialized(&self) -> Option<&SpecializedParquetColumnExpr>;
115}