Skip to main content

dbx_core/storage/gpu/
adaptive.rs

1//! Adaptive algorithm selection for GPU operations
2
3use super::strategy::GpuHashStrategy;
4
5/// Adaptive algorithm selection strategy
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum GpuGroupByStrategy {
8    /// Use hash-based GROUP BY (good for low-medium cardinality)
9    Hash(GpuHashStrategy),
10    /// Use radix sort-based GROUP BY (good for high cardinality)
11    RadixSort,
12}
13
14impl GpuGroupByStrategy {
15    /// Choose optimal GROUP BY strategy based on estimated cardinality
16    ///
17    /// Heuristics:
18    /// - < 100K groups: Use hash-based (Linear/Cuckoo/RobinHood)
19    /// - >= 100K groups: Use radix sort
20    pub fn choose_for_cardinality(estimated_groups: usize) -> Self {
21        const RADIX_SORT_THRESHOLD: usize = 100_000;
22
23        if estimated_groups >= RADIX_SORT_THRESHOLD {
24            GpuGroupByStrategy::RadixSort
25        } else {
26            // For hash-based, choose strategy based on group count
27            let chosen_hash = if estimated_groups < 100 {
28                GpuHashStrategy::Linear
29            } else if estimated_groups < 10_000 {
30                GpuHashStrategy::Cuckoo
31            } else {
32                GpuHashStrategy::RobinHood
33            };
34            GpuGroupByStrategy::Hash(chosen_hash)
35        }
36    }
37
38    /// Choose optimal strategy with manual override
39    pub fn choose_with_override(estimated_groups: usize, force_radix: bool) -> Self {
40        if force_radix {
41            GpuGroupByStrategy::RadixSort
42        } else {
43            Self::choose_for_cardinality(estimated_groups)
44        }
45    }
46}
47
48/// Estimate cardinality (number of unique groups) using HyperLogLog-inspired sampling
49/// This is a simplified version for quick estimation
50#[cfg(test)]
51pub fn estimate_cardinality_i32(keys: &[i32]) -> usize {
52    use std::collections::HashSet;
53
54    let n = keys.len();
55
56    // For small datasets, just count unique values
57    if n <= 10_000 {
58        let unique: HashSet<i32> = keys.iter().copied().collect();
59        return unique.len();
60    }
61
62    // For large datasets, sample and extrapolate
63    // Sample 10% or max 100K elements
64    let sample_size = (n / 10).min(100_000);
65    let step = n / sample_size;
66
67    let mut sample_unique = HashSet::new();
68    for i in (0..n).step_by(step) {
69        sample_unique.insert(keys[i]);
70    }
71
72    // Extrapolate: unique_in_sample / sample_size * total_size
73    // Add 20% buffer for estimation error
74    let estimated = (sample_unique.len() * n / sample_size) * 12 / 10;
75    estimated.min(n) // Can't have more groups than elements
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    #[test]
83    fn test_cardinality_estimation_exact() {
84        // All unique
85        let keys: Vec<i32> = (0..1000).collect();
86        let est = estimate_cardinality_i32(&keys);
87        assert!((900..=1100).contains(&est), "Estimated: {}", est);
88
89        // All same
90        let keys = vec![42; 1000];
91        let est = estimate_cardinality_i32(&keys);
92        assert!(est <= 10, "Estimated: {}", est);
93    }
94
95    #[test]
96    fn test_strategy_selection() {
97        // Low cardinality -> Linear hash
98        let strategy = GpuGroupByStrategy::choose_for_cardinality(50);
99        assert_eq!(strategy, GpuGroupByStrategy::Hash(GpuHashStrategy::Linear));
100
101        // Medium cardinality -> Cuckoo hash
102        let strategy = GpuGroupByStrategy::choose_for_cardinality(500);
103        assert_eq!(strategy, GpuGroupByStrategy::Hash(GpuHashStrategy::Cuckoo));
104
105        // High cardinality (< 100K) -> RobinHood hash
106        let strategy = GpuGroupByStrategy::choose_for_cardinality(50_000);
107        assert_eq!(
108            strategy,
109            GpuGroupByStrategy::Hash(GpuHashStrategy::RobinHood)
110        );
111
112        // Very high cardinality -> Radix sort
113        let strategy = GpuGroupByStrategy::choose_for_cardinality(200_000);
114        assert_eq!(strategy, GpuGroupByStrategy::RadixSort);
115    }
116}