Skip to main content

polars_expr/hot_groups/
mod.rs

1use std::any::Any;
2
3use polars_core::prelude::*;
4use polars_utils::IdxSize;
5
6use crate::EvictIdx;
7use crate::hash_keys::HashKeys;
8
9mod binview;
10mod fixed_index_table;
11mod row_encoded;
12mod single_key;
13
14/// A HotGrouper maps keys to groups, such that duplicate keys map to the same
15/// group. Unlike a Grouper it has a fixed size and will cause evictions rather
16/// than growing.
17pub trait HotGrouper: Any + Send + Sync {
18    /// Creates a new empty HotGrouper similar to this one, with the given size.
19    fn new_empty(&self, groups: usize) -> Box<dyn HotGrouper>;
20
21    /// Returns the number of groups in this HotGrouper.
22    fn num_groups(&self) -> IdxSize;
23
24    /// Inserts the given keys into this Grouper, extending groups_idxs with
25    /// the group index of keys[i].
26    fn insert_keys(
27        &mut self,
28        keys: &HashKeys,
29        hot_idxs: &mut Vec<IdxSize>,
30        hot_group_idxs: &mut Vec<EvictIdx>,
31        cold_idxs: &mut Vec<IdxSize>,
32        force_hot: bool,
33    );
34
35    /// Get all the current hot keys, in group order.
36    fn keys(&self) -> HashKeys;
37
38    /// Get the number of evicted keys stored.
39    fn num_evictions(&self) -> usize;
40
41    /// Consume all the evicted keys from this HotGrouper.
42    fn take_evicted_keys(&mut self) -> HashKeys;
43
44    fn as_any(&self) -> &dyn Any;
45}
46
47pub fn new_hash_hot_grouper(key_schema: Arc<Schema>, num_groups: usize) -> Box<dyn HotGrouper> {
48    if key_schema.len() > 1 {
49        Box::new(row_encoded::RowEncodedHashHotGrouper::new(
50            key_schema, num_groups,
51        ))
52    } else {
53        use single_key::SingleKeyHashHotGrouper as SK;
54        let dt = key_schema.get_at_index(0).unwrap().1.clone();
55        let ng = num_groups;
56        match dt {
57            #[cfg(feature = "dtype-u8")]
58            DataType::UInt8 => Box::new(SK::<UInt8Type>::new(dt, ng)),
59            #[cfg(feature = "dtype-u16")]
60            DataType::UInt16 => Box::new(SK::<UInt16Type>::new(dt, ng)),
61            DataType::UInt32 => Box::new(SK::<UInt32Type>::new(dt, ng)),
62            DataType::UInt64 => Box::new(SK::<UInt64Type>::new(dt, ng)),
63            #[cfg(feature = "dtype-u128")]
64            DataType::UInt128 => Box::new(SK::<UInt128Type>::new(dt, ng)),
65            #[cfg(feature = "dtype-i8")]
66            DataType::Int8 => Box::new(SK::<Int8Type>::new(dt, ng)),
67            #[cfg(feature = "dtype-i16")]
68            DataType::Int16 => Box::new(SK::<Int16Type>::new(dt, ng)),
69            DataType::Int32 => Box::new(SK::<Int32Type>::new(dt, ng)),
70            DataType::Int64 => Box::new(SK::<Int64Type>::new(dt, ng)),
71            #[cfg(feature = "dtype-i128")]
72            DataType::Int128 => Box::new(SK::<Int128Type>::new(dt, ng)),
73            #[cfg(feature = "dtype-f16")]
74            DataType::Float16 => Box::new(SK::<Float16Type>::new(dt, ng)),
75            DataType::Float32 => Box::new(SK::<Float32Type>::new(dt, ng)),
76            DataType::Float64 => Box::new(SK::<Float64Type>::new(dt, ng)),
77
78            #[cfg(feature = "dtype-date")]
79            DataType::Date => Box::new(SK::<Int32Type>::new(dt, ng)),
80            #[cfg(feature = "dtype-datetime")]
81            DataType::Datetime(_, _) => Box::new(SK::<Int64Type>::new(dt, ng)),
82            #[cfg(feature = "dtype-duration")]
83            DataType::Duration(_) => Box::new(SK::<Int64Type>::new(dt, ng)),
84            #[cfg(feature = "dtype-time")]
85            DataType::Time => Box::new(SK::<Int64Type>::new(dt, ng)),
86
87            #[cfg(feature = "dtype-decimal")]
88            DataType::Decimal(_, _) => Box::new(SK::<Int128Type>::new(dt, ng)),
89            #[cfg(feature = "dtype-categorical")]
90            dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
91                with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
92                    Box::new(SK::<<$C as PolarsCategoricalType>::PolarsPhysical>::new(dt.clone(), ng))
93                })
94            },
95
96            DataType::String | DataType::Binary => {
97                Box::new(binview::BinviewHashHotGrouper::new(ng))
98            },
99
100            _ => Box::new(row_encoded::RowEncodedHashHotGrouper::new(
101                key_schema, num_groups,
102            )),
103        }
104    }
105}