polars_expr/groups/mod.rs
1use std::any::Any;
2use std::path::Path;
3
4use polars_core::prelude::*;
5use polars_utils::cardinality_sketch::CardinalitySketch;
6use polars_utils::hashing::HashPartitioner;
7use polars_utils::IdxSize;
8
9use crate::hash_keys::HashKeys;
10
11mod row_encoded;
12
13/// A Grouper maps keys to groups, such that duplicate keys map to the same group.
14pub trait Grouper: Any + Send + Sync {
15 /// Creates a new empty Grouper similar to this one.
16 fn new_empty(&self) -> Box<dyn Grouper>;
17
18 /// Reserves space for the given number additional groups.
19 fn reserve(&mut self, additional: usize);
20
21 /// Returns the number of groups in this Grouper.
22 fn num_groups(&self) -> IdxSize;
23
24 /// Inserts the given keys into this Grouper, mutating groups_idxs such
25 /// that group_idxs[i] is the group index of keys[..][i].
26 fn insert_keys(&mut self, keys: HashKeys, group_idxs: &mut Vec<IdxSize>);
27
28 /// Adds the given Grouper into this one, mutating groups_idxs such that
29 /// the ith group of other now has group index group_idxs[i] in self.
30 fn combine(&mut self, other: &dyn Grouper, group_idxs: &mut Vec<IdxSize>);
31
32 /// Adds the given Grouper into this one, mutating groups_idxs such that
33 /// the group subset[i] of other now has group index group_idxs[i] in self.
34 ///
35 /// # Safety
36 /// For all i, subset[i] < other.len().
37 unsafe fn gather_combine(
38 &mut self,
39 other: &dyn Grouper,
40 subset: &[IdxSize],
41 group_idxs: &mut Vec<IdxSize>,
42 );
43
44 /// Generate partition indices.
45 ///
46 /// After this function partitions_idxs[i] will contain the indices for
47 /// partition i, and sketches[i] will contain a cardinality sketch for
48 /// partition i.
49 fn gen_partition_idxs(
50 &self,
51 partitioner: &HashPartitioner,
52 partition_idxs: &mut [Vec<IdxSize>],
53 sketches: &mut [CardinalitySketch],
54 );
55
56 /// Returns the keys in this Grouper in group order, that is the key for
57 /// group i is returned in row i.
58 fn get_keys_in_group_order(&self) -> DataFrame;
59
60 /// Stores this Grouper at the given path.
61 fn store_ooc(&self, _path: &Path) {
62 unimplemented!();
63 }
64
65 /// Loads this Grouper from the given path.
66 fn load_ooc(&mut self, _path: &Path) {
67 unimplemented!();
68 }
69
70 fn as_any(&self) -> &dyn Any;
71}
72
73pub fn new_hash_grouper(key_schema: Arc<Schema>) -> Box<dyn Grouper> {
74 Box::new(row_encoded::RowEncodedHashGrouper::new(key_schema))
75}