Skip to main content

dbx_core/storage/gpu/
strategy.rs

1//! GPU hash and reduction strategies
2
3use crate::error::{DbxError, DbxResult};
4
5/// GPU Hash Strategy for GROUP BY operations
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum GpuHashStrategy {
8    /// Linear probing - best for small group counts (< 100)
9    #[default]
10    Linear,
11    /// Cuckoo hashing - best for medium group counts (100-1000)
12    Cuckoo,
13    /// Robin Hood hashing - best for large group counts (> 1000) or large datasets
14    RobinHood,
15}
16
17impl GpuHashStrategy {
18    /// Parse from string (case-insensitive)
19    pub fn parse(s: &str) -> DbxResult<Self> {
20        match s.to_lowercase().as_str() {
21            "linear" => Ok(GpuHashStrategy::Linear),
22            "cuckoo" => Ok(GpuHashStrategy::Cuckoo),
23            "robin_hood" | "robinhood" => Ok(GpuHashStrategy::RobinHood),
24            _ => Err(DbxError::Gpu(format!(
25                "Invalid GPU hash strategy: '{}'. Valid options: linear, cuckoo, robin_hood",
26                s
27            ))),
28        }
29    }
30
31    /// Get strategy name
32    pub fn as_str(&self) -> &'static str {
33        match self {
34            GpuHashStrategy::Linear => "linear",
35            GpuHashStrategy::Cuckoo => "cuckoo",
36            GpuHashStrategy::RobinHood => "robin_hood",
37        }
38    }
39}
40
41/// Reduction strategy for SUM/COUNT operations
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
43pub enum GpuReductionStrategy {
44    /// Automatically choose based on data size
45    #[default]
46    Auto,
47    /// Single-pass with atomic operations (good for small data)
48    SinglePass,
49    /// Multi-pass reduction (good for large data, eliminates atomic contention)
50    MultiPass,
51    /// Histogram-based aggregation (best for low cardinality < 1000)
52    Histogram,
53}
54
55impl GpuReductionStrategy {
56    /// Parse from string (case-insensitive)
57    pub fn parse(s: &str) -> DbxResult<Self> {
58        match s.to_lowercase().as_str() {
59            "auto" => Ok(GpuReductionStrategy::Auto),
60            "single" | "single_pass" => Ok(GpuReductionStrategy::SinglePass),
61            "multi" | "multi_pass" => Ok(GpuReductionStrategy::MultiPass),
62            "histogram" => Ok(GpuReductionStrategy::Histogram),
63            _ => Err(DbxError::Gpu(format!(
64                "Invalid GPU reduction strategy: '{}'. Valid options: auto, single_pass, multi_pass, histogram",
65                s
66            ))),
67        }
68    }
69
70    /// Get strategy name
71    pub fn as_str(&self) -> &'static str {
72        match self {
73            GpuReductionStrategy::Auto => "auto",
74            GpuReductionStrategy::SinglePass => "single_pass",
75            GpuReductionStrategy::MultiPass => "multi_pass",
76            GpuReductionStrategy::Histogram => "histogram",
77        }
78    }
79
80    /// Choose optimal strategy based on data size
81    /// For SUM: single-pass is generally better for current GPU architecture
82    /// Multi-pass only beneficial for extremely large datasets (>100M rows)
83    pub fn choose_for_sum(&self, data_size: usize) -> GpuReductionStrategy {
84        match self {
85            GpuReductionStrategy::Auto => {
86                // Based on benchmarks: single-pass is better for most cases
87                // Only use multi-pass for very large datasets
88                if data_size > 100_000_000 {
89                    GpuReductionStrategy::MultiPass
90                } else {
91                    GpuReductionStrategy::SinglePass
92                }
93            }
94            strategy => *strategy,
95        }
96    }
97}