polars_expr/idx_table/
mod.rs

1use std::any::Any;
2
3use polars_core::prelude::*;
4use polars_utils::IdxSize;
5
6use crate::hash_keys::HashKeys;
7
8mod binview;
9mod row_encoded;
10mod single_key;
11
12pub trait IdxTable: Any + Send + Sync {
13    /// Creates a new empty IdxTable similar to this one.
14    fn new_empty(&self) -> Box<dyn IdxTable>;
15
16    /// Reserves space for the given number additional keys.
17    fn reserve(&mut self, additional: usize);
18
19    /// Returns the number of unique keys in this IdxTable.
20    fn num_keys(&self) -> IdxSize;
21
22    /// Inserts the given keys into this IdxTable.
23    fn insert_keys(&mut self, keys: &HashKeys, track_unmatchable: bool);
24
25    /// Inserts a subset of the given keys into this IdxTable.
26    /// # Safety
27    /// The provided subset indices must be in-bounds.
28    unsafe fn insert_keys_subset(
29        &mut self,
30        keys: &HashKeys,
31        subset: &[IdxSize],
32        track_unmatchable: bool,
33    );
34
35    /// Probe the table, adding an entry to table_match and probe_match for each
36    /// match. Will stop processing new keys once limit matches have been
37    /// generated, returning the number of keys processed.
38    ///
39    /// If mark_matches is true, matches are marked in the table as such.
40    ///
41    /// If emit_unmatched is true, for keys that do not have a match we emit a
42    /// match with ChunkId::null() on the table match.
43    fn probe(
44        &self,
45        hash_keys: &HashKeys,
46        table_match: &mut Vec<IdxSize>,
47        probe_match: &mut Vec<IdxSize>,
48        mark_matches: bool,
49        emit_unmatched: bool,
50        limit: IdxSize,
51    ) -> IdxSize;
52
53    /// The same as probe, except it will only apply to the specified subset of keys.
54    /// # Safety
55    /// The provided subset indices must be in-bounds.
56    #[allow(clippy::too_many_arguments)]
57    unsafe fn probe_subset(
58        &self,
59        hash_keys: &HashKeys,
60        subset: &[IdxSize],
61        table_match: &mut Vec<IdxSize>,
62        probe_match: &mut Vec<IdxSize>,
63        mark_matches: bool,
64        emit_unmatched: bool,
65        limit: IdxSize,
66    ) -> IdxSize;
67
68    /// Get the ChunkIds for each key which was never marked during probing.
69    fn unmarked_keys(&self, out: &mut Vec<IdxSize>, offset: IdxSize, limit: IdxSize) -> IdxSize;
70}
71
72pub fn new_idx_table(key_schema: Arc<Schema>) -> Box<dyn IdxTable> {
73    if key_schema.len() > 1 {
74        Box::new(row_encoded::RowEncodedIdxTable::new())
75    } else {
76        use single_key::SingleKeyIdxTable as SKIT;
77        match key_schema.get_at_index(0).unwrap().1 {
78            #[cfg(feature = "dtype-u8")]
79            DataType::UInt8 => Box::new(SKIT::<UInt8Type>::new()),
80            #[cfg(feature = "dtype-u16")]
81            DataType::UInt16 => Box::new(SKIT::<UInt16Type>::new()),
82            DataType::UInt32 => Box::new(SKIT::<UInt32Type>::new()),
83            DataType::UInt64 => Box::new(SKIT::<UInt64Type>::new()),
84            #[cfg(feature = "dtype-u128")]
85            DataType::UInt128 => Box::new(SKIT::<UInt128Type>::new()),
86            #[cfg(feature = "dtype-i8")]
87            DataType::Int8 => Box::new(SKIT::<Int8Type>::new()),
88            #[cfg(feature = "dtype-i16")]
89            DataType::Int16 => Box::new(SKIT::<Int16Type>::new()),
90            DataType::Int32 => Box::new(SKIT::<Int32Type>::new()),
91            DataType::Int64 => Box::new(SKIT::<Int64Type>::new()),
92            #[cfg(feature = "dtype-i128")]
93            DataType::Int128 => Box::new(SKIT::<Int128Type>::new()),
94            DataType::Float32 => Box::new(SKIT::<Float32Type>::new()),
95            DataType::Float64 => Box::new(SKIT::<Float64Type>::new()),
96
97            #[cfg(feature = "dtype-date")]
98            DataType::Date => Box::new(SKIT::<Int32Type>::new()),
99            #[cfg(feature = "dtype-datetime")]
100            DataType::Datetime(_, _) => Box::new(SKIT::<Int64Type>::new()),
101            #[cfg(feature = "dtype-duration")]
102            DataType::Duration(_) => Box::new(SKIT::<Int64Type>::new()),
103            #[cfg(feature = "dtype-time")]
104            DataType::Time => Box::new(SKIT::<Int64Type>::new()),
105
106            #[cfg(feature = "dtype-decimal")]
107            DataType::Decimal(_, _) => Box::new(SKIT::<Int128Type>::new()),
108            #[cfg(feature = "dtype-categorical")]
109            dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
110                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
111                    Box::new(SKIT::<<$C as PolarsCategoricalType>::PolarsPhysical>::new())
112                })
113            },
114
115            DataType::String | DataType::Binary => Box::new(binview::BinviewKeyIdxTable::new()),
116
117            _ => Box::new(row_encoded::RowEncodedIdxTable::new()),
118        }
119    }
120}