polars_expr/idx_table/
mod.rs1use std::any::Any;
2
3use polars_core::prelude::*;
4use polars_utils::IdxSize;
5
6use crate::hash_keys::HashKeys;
7
8mod binview;
9mod row_encoded;
10mod single_key;
11
12pub trait IdxTable: Any + Send + Sync {
13 fn new_empty(&self) -> Box<dyn IdxTable>;
15
16 fn reserve(&mut self, additional: usize);
18
19 fn num_keys(&self) -> IdxSize;
21
22 fn insert_keys(&mut self, keys: &HashKeys, track_unmatchable: bool);
24
25 unsafe fn insert_keys_subset(
29 &mut self,
30 keys: &HashKeys,
31 subset: &[IdxSize],
32 track_unmatchable: bool,
33 );
34
35 fn probe(
44 &self,
45 hash_keys: &HashKeys,
46 table_match: &mut Vec<IdxSize>,
47 probe_match: &mut Vec<IdxSize>,
48 mark_matches: bool,
49 emit_unmatched: bool,
50 limit: IdxSize,
51 ) -> IdxSize;
52
53 #[allow(clippy::too_many_arguments)]
57 unsafe fn probe_subset(
58 &self,
59 hash_keys: &HashKeys,
60 subset: &[IdxSize],
61 table_match: &mut Vec<IdxSize>,
62 probe_match: &mut Vec<IdxSize>,
63 mark_matches: bool,
64 emit_unmatched: bool,
65 limit: IdxSize,
66 ) -> IdxSize;
67
68 fn unmarked_keys(&self, out: &mut Vec<IdxSize>, offset: IdxSize, limit: IdxSize) -> IdxSize;
70}
71
72pub fn new_idx_table(key_schema: Arc<Schema>) -> Box<dyn IdxTable> {
73 if key_schema.len() > 1 {
74 Box::new(row_encoded::RowEncodedIdxTable::new())
75 } else {
76 use single_key::SingleKeyIdxTable as SKIT;
77 match key_schema.get_at_index(0).unwrap().1 {
78 #[cfg(feature = "dtype-u8")]
79 DataType::UInt8 => Box::new(SKIT::<UInt8Type>::new()),
80 #[cfg(feature = "dtype-u16")]
81 DataType::UInt16 => Box::new(SKIT::<UInt16Type>::new()),
82 DataType::UInt32 => Box::new(SKIT::<UInt32Type>::new()),
83 DataType::UInt64 => Box::new(SKIT::<UInt64Type>::new()),
84 #[cfg(feature = "dtype-u128")]
85 DataType::UInt128 => Box::new(SKIT::<UInt128Type>::new()),
86 #[cfg(feature = "dtype-i8")]
87 DataType::Int8 => Box::new(SKIT::<Int8Type>::new()),
88 #[cfg(feature = "dtype-i16")]
89 DataType::Int16 => Box::new(SKIT::<Int16Type>::new()),
90 DataType::Int32 => Box::new(SKIT::<Int32Type>::new()),
91 DataType::Int64 => Box::new(SKIT::<Int64Type>::new()),
92 #[cfg(feature = "dtype-i128")]
93 DataType::Int128 => Box::new(SKIT::<Int128Type>::new()),
94 DataType::Float32 => Box::new(SKIT::<Float32Type>::new()),
95 DataType::Float64 => Box::new(SKIT::<Float64Type>::new()),
96
97 #[cfg(feature = "dtype-date")]
98 DataType::Date => Box::new(SKIT::<Int32Type>::new()),
99 #[cfg(feature = "dtype-datetime")]
100 DataType::Datetime(_, _) => Box::new(SKIT::<Int64Type>::new()),
101 #[cfg(feature = "dtype-duration")]
102 DataType::Duration(_) => Box::new(SKIT::<Int64Type>::new()),
103 #[cfg(feature = "dtype-time")]
104 DataType::Time => Box::new(SKIT::<Int64Type>::new()),
105
106 #[cfg(feature = "dtype-decimal")]
107 DataType::Decimal(_, _) => Box::new(SKIT::<Int128Type>::new()),
108 #[cfg(feature = "dtype-categorical")]
109 dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
110 with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
111 Box::new(SKIT::<<$C as PolarsCategoricalType>::PolarsPhysical>::new())
112 })
113 },
114
115 DataType::String | DataType::Binary => Box::new(binview::BinviewKeyIdxTable::new()),
116
117 _ => Box::new(row_encoded::RowEncodedIdxTable::new()),
118 }
119 }
120}