polars_expr/groups/
mod.rs

1use std::any::Any;
2use std::path::Path;
3
4use polars_core::prelude::*;
5use polars_utils::cardinality_sketch::CardinalitySketch;
6use polars_utils::hashing::HashPartitioner;
7use polars_utils::IdxSize;
8
9use crate::hash_keys::HashKeys;
10
11mod row_encoded;
12
13/// A Grouper maps keys to groups, such that duplicate keys map to the same group.
14pub trait Grouper: Any + Send + Sync {
15    /// Creates a new empty Grouper similar to this one.
16    fn new_empty(&self) -> Box<dyn Grouper>;
17
18    /// Reserves space for the given number additional groups.
19    fn reserve(&mut self, additional: usize);
20
21    /// Returns the number of groups in this Grouper.
22    fn num_groups(&self) -> IdxSize;
23
24    /// Inserts the given keys into this Grouper, mutating groups_idxs such
25    /// that group_idxs[i] is the group index of keys[..][i].
26    fn insert_keys(&mut self, keys: HashKeys, group_idxs: &mut Vec<IdxSize>);
27
28    /// Adds the given Grouper into this one, mutating groups_idxs such that
29    /// the ith group of other now has group index group_idxs[i] in self.
30    fn combine(&mut self, other: &dyn Grouper, group_idxs: &mut Vec<IdxSize>);
31
32    /// Adds the given Grouper into this one, mutating groups_idxs such that
33    /// the group subset[i] of other now has group index group_idxs[i] in self.
34    ///
35    /// # Safety
36    /// For all i, subset[i] < other.len().
37    unsafe fn gather_combine(
38        &mut self,
39        other: &dyn Grouper,
40        subset: &[IdxSize],
41        group_idxs: &mut Vec<IdxSize>,
42    );
43
44    /// Generate partition indices.
45    ///
46    /// After this function partitions_idxs[i] will contain the indices for
47    /// partition i, and sketches[i] will contain a cardinality sketch for
48    /// partition i.
49    fn gen_partition_idxs(
50        &self,
51        partitioner: &HashPartitioner,
52        partition_idxs: &mut [Vec<IdxSize>],
53        sketches: &mut [CardinalitySketch],
54    );
55
56    /// Returns the keys in this Grouper in group order, that is the key for
57    /// group i is returned in row i.
58    fn get_keys_in_group_order(&self) -> DataFrame;
59
60    /// Stores this Grouper at the given path.
61    fn store_ooc(&self, _path: &Path) {
62        unimplemented!();
63    }
64
65    /// Loads this Grouper from the given path.
66    fn load_ooc(&mut self, _path: &Path) {
67        unimplemented!();
68    }
69
70    fn as_any(&self) -> &dyn Any;
71}
72
73pub fn new_hash_grouper(key_schema: Arc<Schema>) -> Box<dyn Grouper> {
74    Box::new(row_encoded::RowEncodedHashGrouper::new(key_schema))
75}