polars_expr/groups/
mod.rs1use std::any::Any;
2
3use arrow::bitmap::BitmapBuilder;
4use polars_core::prelude::*;
5#[cfg(feature = "dtype-categorical")]
6use polars_core::with_match_categorical_physical_type;
7use polars_core::with_match_physical_numeric_polars_type;
8use polars_utils::IdxSize;
9use polars_utils::hashing::HashPartitioner;
10
11use crate::hash_keys::HashKeys;
12
13mod binview;
14mod row_encoded;
15mod single_key;
16
17pub trait Grouper: Any + Send + Sync {
19 fn new_empty(&self) -> Box<dyn Grouper>;
21
22 fn reserve(&mut self, additional: usize);
24
25 fn num_groups(&self) -> IdxSize;
27
28 unsafe fn insert_keys_subset(
34 &mut self,
35 keys: &HashKeys,
36 subset: &[IdxSize],
37 group_idxs: Option<&mut Vec<IdxSize>>,
38 );
39
40 fn get_keys_in_group_order(&self, schema: &Schema) -> DataFrame;
43
44 unsafe fn probe_partitioned_groupers(
49 &self,
50 groupers: &[Box<dyn Grouper>],
51 keys: &HashKeys,
52 partitioner: &HashPartitioner,
53 invert: bool,
54 probe_matches: &mut Vec<IdxSize>,
55 );
56
57 unsafe fn contains_key_partitioned_groupers(
62 &self,
63 groupers: &[Box<dyn Grouper>],
64 keys: &HashKeys,
65 partitioner: &HashPartitioner,
66 invert: bool,
67 contains_key: &mut BitmapBuilder,
68 );
69
70 fn as_any(&self) -> &dyn Any;
71}
72
73pub fn new_hash_grouper(key_schema: Arc<Schema>) -> Box<dyn Grouper> {
74 if key_schema.len() > 1 {
75 Box::new(row_encoded::RowEncodedHashGrouper::new())
76 } else {
77 let (_name, dt) = key_schema.get_at_index(0).unwrap();
78 match dt {
79 dt if dt.is_primitive_numeric() | dt.is_temporal() => {
80 with_match_physical_numeric_polars_type!(dt.to_physical(), |$T| {
81 Box::new(single_key::SingleKeyHashGrouper::<$T>::new())
82 })
83 },
84
85 #[cfg(feature = "dtype-decimal")]
86 DataType::Decimal(_, _) => {
87 Box::new(single_key::SingleKeyHashGrouper::<Int128Type>::new())
88 },
89 #[cfg(feature = "dtype-categorical")]
90 dt @ (DataType::Enum(_, _) | DataType::Categorical(_, _)) => {
91 with_match_categorical_physical_type!(dt.cat_physical().unwrap(), |$C| {
92 Box::new(single_key::SingleKeyHashGrouper::<<$C as PolarsCategoricalType>::PolarsPhysical>::new())
93 })
94 },
95
96 DataType::String | DataType::Binary => Box::new(binview::BinviewHashGrouper::new()),
97
98 _ => Box::new(row_encoded::RowEncodedHashGrouper::new()),
99 }
100 }
101}