polars_expr/hot_groups/
mod.rs1use std::any::Any;
2
3use polars_core::prelude::*;
4use polars_utils::IdxSize;
5
6use crate::EvictIdx;
7use crate::hash_keys::HashKeys;
8
9mod binview;
10mod fixed_index_table;
11mod row_encoded;
12mod single_key;
13
14pub trait HotGrouper: Any + Send + Sync {
18 fn new_empty(&self, groups: usize) -> Box<dyn HotGrouper>;
20
21 fn num_groups(&self) -> IdxSize;
23
24 fn insert_keys(
27 &mut self,
28 keys: &HashKeys,
29 hot_idxs: &mut Vec<IdxSize>,
30 hot_group_idxs: &mut Vec<EvictIdx>,
31 cold_idxs: &mut Vec<IdxSize>,
32 force_hot: bool,
33 );
34
35 fn keys(&self) -> HashKeys;
37
38 fn num_evictions(&self) -> usize;
40
41 fn take_evicted_keys(&mut self) -> HashKeys;
43
44 fn as_any(&self) -> &dyn Any;
45}
46
47pub fn new_hash_hot_grouper(key_schema: Arc<Schema>, num_groups: usize) -> Box<dyn HotGrouper> {
48 if key_schema.len() > 1 {
49 Box::new(row_encoded::RowEncodedHashHotGrouper::new(
50 key_schema, num_groups,
51 ))
52 } else {
53 use single_key::SingleKeyHashHotGrouper as SK;
54 let dt = key_schema.get_at_index(0).unwrap().1.clone();
55 let ng = num_groups;
56 match dt {
57 #[cfg(feature = "dtype-u8")]
58 DataType::UInt8 => Box::new(SK::<UInt8Type>::new(dt, ng)),
59 #[cfg(feature = "dtype-u16")]
60 DataType::UInt16 => Box::new(SK::<UInt16Type>::new(dt, ng)),
61 DataType::UInt32 => Box::new(SK::<UInt32Type>::new(dt, ng)),
62 DataType::UInt64 => Box::new(SK::<UInt64Type>::new(dt, ng)),
63 #[cfg(feature = "dtype-u128")]
64 DataType::UInt128 => Box::new(SK::<UInt128Type>::new(dt, ng)),
65 #[cfg(feature = "dtype-i8")]
66 DataType::Int8 => Box::new(SK::<Int8Type>::new(dt, ng)),
67 #[cfg(feature = "dtype-i16")]
68 DataType::Int16 => Box::new(SK::<Int16Type>::new(dt, ng)),
69 DataType::Int32 => Box::new(SK::<Int32Type>::new(dt, ng)),
70 DataType::Int64 => Box::new(SK::<Int64Type>::new(dt, ng)),
71 #[cfg(feature = "dtype-i128")]
72 DataType::Int128 => Box::new(SK::<Int128Type>::new(dt, ng)),
73 #[cfg(feature = "dtype-f16")]
74 DataType::Float16 => Box::new(SK::<Float16Type>::new(dt, ng)),
75 DataType::Float32 => Box::new(SK::<Float32Type>::new(dt, ng)),
76 DataType::Float64 => Box::new(SK::<Float64Type>::new(dt, ng)),
77
78 #[cfg(feature = "dtype-date")]
79 DataType::Date => Box::new(SK::<Int32Type>::new(dt, ng)),
80 #[cfg(feature = "dtype-datetime")]
81 DataType::Datetime(_, _) => Box::new(SK::<Int64Type>::new(dt, ng)),
82 #[cfg(feature = "dtype-duration")]
83 DataType::Duration(_) => Box::new(SK::<Int64Type>::new(dt, ng)),
84 #[cfg(feature = "dtype-time")]
85 DataType::Time => Box::new(SK::<Int64Type>::new(dt, ng)),
86
87 #[cfg(feature = "dtype-decimal")]
88 DataType::Decimal(_, _) => Box::new(SK::<Int128Type>::new(dt, ng)),
89 #[cfg(feature = "dtype-categorical")]
90 dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
91 with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
92 Box::new(SK::<<$C as PolarsCategoricalType>::PolarsPhysical>::new(dt.clone(), ng))
93 })
94 },
95
96 DataType::String | DataType::Binary => {
97 Box::new(binview::BinviewHashHotGrouper::new(ng))
98 },
99
100 _ => Box::new(row_encoded::RowEncodedHashHotGrouper::new(
101 key_schema, num_groups,
102 )),
103 }
104 }
105}