Skip to main content

grafeo_core/execution/operators/
accumulator.rs

1//! Shared accumulator types for both pull-based and push-based aggregate operators.
2//!
3//! Provides the canonical definitions of [`AggregateFunction`], [`AggregateExpr`],
4//! and [`HashableValue`] used by both `aggregate.rs` (pull) and `push/aggregate.rs`.
5
6use grafeo_common::types::Value;
7
8/// Aggregation function types.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum AggregateFunction {
11    /// Count of rows (COUNT(*)).
12    Count,
13    /// Count of non-null values (COUNT(column)).
14    CountNonNull,
15    /// Sum of values.
16    Sum,
17    /// Average of values.
18    Avg,
19    /// Minimum value.
20    Min,
21    /// Maximum value.
22    Max,
23    /// First value in the group.
24    First,
25    /// Last value in the group.
26    Last,
27    /// Collect values into a list.
28    Collect,
29    /// Sample standard deviation (STDEV).
30    StdDev,
31    /// Population standard deviation (STDEVP).
32    StdDevPop,
33    /// Discrete percentile (PERCENTILE_DISC).
34    PercentileDisc,
35    /// Continuous percentile (PERCENTILE_CONT).
36    PercentileCont,
37}
38
39/// An aggregation expression.
40#[derive(Debug, Clone)]
41pub struct AggregateExpr {
42    /// The aggregation function.
43    pub function: AggregateFunction,
44    /// Column index to aggregate (None for COUNT(*)).
45    pub column: Option<usize>,
46    /// Whether to aggregate distinct values only.
47    pub distinct: bool,
48    /// Output alias (for naming the result column).
49    pub alias: Option<String>,
50    /// Percentile parameter for PERCENTILE_DISC/PERCENTILE_CONT (0.0 to 1.0).
51    pub percentile: Option<f64>,
52}
53
54impl AggregateExpr {
55    /// Creates a COUNT(*) expression.
56    pub fn count_star() -> Self {
57        Self {
58            function: AggregateFunction::Count,
59            column: None,
60            distinct: false,
61            alias: None,
62            percentile: None,
63        }
64    }
65
66    /// Creates a COUNT(column) expression.
67    pub fn count(column: usize) -> Self {
68        Self {
69            function: AggregateFunction::CountNonNull,
70            column: Some(column),
71            distinct: false,
72            alias: None,
73            percentile: None,
74        }
75    }
76
77    /// Creates a SUM(column) expression.
78    pub fn sum(column: usize) -> Self {
79        Self {
80            function: AggregateFunction::Sum,
81            column: Some(column),
82            distinct: false,
83            alias: None,
84            percentile: None,
85        }
86    }
87
88    /// Creates an AVG(column) expression.
89    pub fn avg(column: usize) -> Self {
90        Self {
91            function: AggregateFunction::Avg,
92            column: Some(column),
93            distinct: false,
94            alias: None,
95            percentile: None,
96        }
97    }
98
99    /// Creates a MIN(column) expression.
100    pub fn min(column: usize) -> Self {
101        Self {
102            function: AggregateFunction::Min,
103            column: Some(column),
104            distinct: false,
105            alias: None,
106            percentile: None,
107        }
108    }
109
110    /// Creates a MAX(column) expression.
111    pub fn max(column: usize) -> Self {
112        Self {
113            function: AggregateFunction::Max,
114            column: Some(column),
115            distinct: false,
116            alias: None,
117            percentile: None,
118        }
119    }
120
121    /// Creates a FIRST(column) expression.
122    pub fn first(column: usize) -> Self {
123        Self {
124            function: AggregateFunction::First,
125            column: Some(column),
126            distinct: false,
127            alias: None,
128            percentile: None,
129        }
130    }
131
132    /// Creates a LAST(column) expression.
133    pub fn last(column: usize) -> Self {
134        Self {
135            function: AggregateFunction::Last,
136            column: Some(column),
137            distinct: false,
138            alias: None,
139            percentile: None,
140        }
141    }
142
143    /// Creates a COLLECT(column) expression.
144    pub fn collect(column: usize) -> Self {
145        Self {
146            function: AggregateFunction::Collect,
147            column: Some(column),
148            distinct: false,
149            alias: None,
150            percentile: None,
151        }
152    }
153
154    /// Creates a STDEV(column) expression (sample standard deviation).
155    pub fn stdev(column: usize) -> Self {
156        Self {
157            function: AggregateFunction::StdDev,
158            column: Some(column),
159            distinct: false,
160            alias: None,
161            percentile: None,
162        }
163    }
164
165    /// Creates a STDEVP(column) expression (population standard deviation).
166    pub fn stdev_pop(column: usize) -> Self {
167        Self {
168            function: AggregateFunction::StdDevPop,
169            column: Some(column),
170            distinct: false,
171            alias: None,
172            percentile: None,
173        }
174    }
175
176    /// Creates a PERCENTILE_DISC(column, percentile) expression.
177    ///
178    /// # Arguments
179    /// * `column` - Column index to aggregate
180    /// * `percentile` - Percentile value between 0.0 and 1.0 (e.g., 0.5 for median)
181    pub fn percentile_disc(column: usize, percentile: f64) -> Self {
182        Self {
183            function: AggregateFunction::PercentileDisc,
184            column: Some(column),
185            distinct: false,
186            alias: None,
187            percentile: Some(percentile.clamp(0.0, 1.0)),
188        }
189    }
190
191    /// Creates a PERCENTILE_CONT(column, percentile) expression.
192    ///
193    /// # Arguments
194    /// * `column` - Column index to aggregate
195    /// * `percentile` - Percentile value between 0.0 and 1.0 (e.g., 0.5 for median)
196    pub fn percentile_cont(column: usize, percentile: f64) -> Self {
197        Self {
198            function: AggregateFunction::PercentileCont,
199            column: Some(column),
200            distinct: false,
201            alias: None,
202            percentile: Some(percentile.clamp(0.0, 1.0)),
203        }
204    }
205
206    /// Sets the distinct flag.
207    pub fn with_distinct(mut self) -> Self {
208        self.distinct = true;
209        self
210    }
211
212    /// Sets the output alias.
213    pub fn with_alias(mut self, alias: impl Into<String>) -> Self {
214        self.alias = Some(alias.into());
215        self
216    }
217}
218
219/// A wrapper for [`Value`] that can be hashed (for DISTINCT tracking).
220#[derive(Debug, Clone, PartialEq, Eq, Hash)]
221pub enum HashableValue {
222    /// Null value.
223    Null,
224    /// Boolean value.
225    Bool(bool),
226    /// Integer value.
227    Int64(i64),
228    /// Float as raw bits (for deterministic hashing).
229    Float64Bits(u64),
230    /// String value.
231    String(String),
232    /// Fallback for other types (uses Debug representation).
233    Other(String),
234}
235
236impl From<&Value> for HashableValue {
237    fn from(v: &Value) -> Self {
238        match v {
239            Value::Null => HashableValue::Null,
240            Value::Bool(b) => HashableValue::Bool(*b),
241            Value::Int64(i) => HashableValue::Int64(*i),
242            Value::Float64(f) => HashableValue::Float64Bits(f.to_bits()),
243            Value::String(s) => HashableValue::String(s.to_string()),
244            other => HashableValue::Other(format!("{other:?}")),
245        }
246    }
247}
248
249impl From<Value> for HashableValue {
250    fn from(v: Value) -> Self {
251        Self::from(&v)
252    }
253}