elasticube_core/cube/
calculated.rs

1//! Calculated measures and virtual dimensions
2//!
3//! Support for derived fields using DataFusion SQL expressions.
4
5use arrow::datatypes::DataType;
6use serde::{Deserialize, Serialize};
7
8use super::measure::AggFunc;
9use crate::error::{Error, Result};
10
11/// A calculated measure derived from an expression
12///
13/// Calculated measures are derived from other measures or dimensions using
14/// SQL-like expressions. They're computed at query time using DataFusion's
15/// expression engine.
16///
17/// # Examples
18///
19/// ```rust,ignore
20/// // profit = revenue - cost
21/// let profit = CalculatedMeasure::new(
22///     "profit",
23///     "revenue - cost",
24///     DataType::Float64,
25///     AggFunc::Sum
26/// )?;
27///
28/// // margin = (profit / revenue) * 100
29/// let margin = CalculatedMeasure::new(
30///     "margin",
31///     "(profit / revenue) * 100",
32///     DataType::Float64,
33///     AggFunc::Avg
34/// )?;
35/// ```
36#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
37pub struct CalculatedMeasure {
38    /// Name of the calculated measure
39    name: String,
40
41    /// SQL expression to compute this measure
42    /// Can reference other measures and dimensions
43    expression: String,
44
45    /// Expected data type of the result
46    data_type: DataType,
47
48    /// Default aggregation function
49    default_agg: AggFunc,
50
51    /// Whether the result can be null
52    nullable: bool,
53
54    /// User-provided description
55    description: Option<String>,
56
57    /// Format string for display
58    format: Option<String>,
59}
60
61impl CalculatedMeasure {
62    /// Create a new calculated measure
63    ///
64    /// # Arguments
65    /// * `name` - Name for the calculated measure
66    /// * `expression` - SQL expression (e.g., "revenue - cost")
67    /// * `data_type` - Expected result data type
68    /// * `default_agg` - Default aggregation function
69    ///
70    /// # Returns
71    /// A new CalculatedMeasure instance
72    pub fn new(
73        name: impl Into<String>,
74        expression: impl Into<String>,
75        data_type: DataType,
76        default_agg: AggFunc,
77    ) -> Result<Self> {
78        let name = name.into();
79        let expression = expression.into();
80
81        // Basic validation
82        if name.is_empty() {
83            return Err(Error::Schema("Calculated measure name cannot be empty".into()));
84        }
85        if expression.is_empty() {
86            return Err(Error::Schema("Expression cannot be empty".into()));
87        }
88
89        // Validate aggregation is compatible with data type
90        if !default_agg.is_compatible_with(&data_type) {
91            return Err(Error::Schema(format!(
92                "Aggregation function {} is not compatible with data type {:?}",
93                default_agg, data_type
94            )));
95        }
96
97        Ok(Self {
98            name,
99            expression,
100            data_type,
101            default_agg,
102            nullable: true,
103            description: None,
104            format: None,
105        })
106    }
107
108    /// Get the measure name
109    pub fn name(&self) -> &str {
110        &self.name
111    }
112
113    /// Get the SQL expression
114    pub fn expression(&self) -> &str {
115        &self.expression
116    }
117
118    /// Get the data type
119    pub fn data_type(&self) -> &DataType {
120        &self.data_type
121    }
122
123    /// Get the default aggregation function
124    pub fn default_agg(&self) -> AggFunc {
125        self.default_agg
126    }
127
128    /// Check if the measure is nullable
129    pub fn is_nullable(&self) -> bool {
130        self.nullable
131    }
132
133    /// Get the description
134    pub fn description(&self) -> Option<&str> {
135        self.description.as_deref()
136    }
137
138    /// Get the format string
139    pub fn format(&self) -> Option<&str> {
140        self.format.as_deref()
141    }
142
143    /// Builder-style: set nullable
144    pub fn with_nullable(mut self, nullable: bool) -> Self {
145        self.nullable = nullable;
146        self
147    }
148
149    /// Builder-style: set description
150    pub fn with_description(mut self, description: impl Into<String>) -> Self {
151        self.description = Some(description.into());
152        self
153    }
154
155    /// Builder-style: set format
156    pub fn with_format(mut self, format: impl Into<String>) -> Self {
157        self.format = Some(format.into());
158        self
159    }
160}
161
162/// A virtual dimension computed from an expression
163///
164/// Virtual dimensions are derived from other dimensions or measures using
165/// SQL-like expressions. Common use cases include date part extraction,
166/// categorization, and transformations.
167///
168/// # Examples
169///
170/// ```rust,ignore
171/// // Extract year from date
172/// let year = VirtualDimension::new(
173///     "year",
174///     "EXTRACT(YEAR FROM sale_date)",
175///     DataType::Int32
176/// )?;
177///
178/// // Categorize ages
179/// let age_group = VirtualDimension::new(
180///     "age_group",
181///     "CASE WHEN age < 18 THEN 'Minor' WHEN age < 65 THEN 'Adult' ELSE 'Senior' END",
182///     DataType::Utf8
183/// )?;
184/// ```
185#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
186pub struct VirtualDimension {
187    /// Name of the virtual dimension
188    name: String,
189
190    /// SQL expression to compute this dimension
191    expression: String,
192
193    /// Expected data type of the result
194    data_type: DataType,
195
196    /// Whether the result can be null
197    nullable: bool,
198
199    /// Estimated cardinality (number of unique values)
200    cardinality: Option<usize>,
201
202    /// User-provided description
203    description: Option<String>,
204}
205
206impl VirtualDimension {
207    /// Create a new virtual dimension
208    ///
209    /// # Arguments
210    /// * `name` - Name for the virtual dimension
211    /// * `expression` - SQL expression (e.g., "EXTRACT(YEAR FROM date)")
212    /// * `data_type` - Expected result data type
213    ///
214    /// # Returns
215    /// A new VirtualDimension instance
216    pub fn new(
217        name: impl Into<String>,
218        expression: impl Into<String>,
219        data_type: DataType,
220    ) -> Result<Self> {
221        let name = name.into();
222        let expression = expression.into();
223
224        // Basic validation
225        if name.is_empty() {
226            return Err(Error::Schema("Virtual dimension name cannot be empty".into()));
227        }
228        if expression.is_empty() {
229            return Err(Error::Schema("Expression cannot be empty".into()));
230        }
231
232        Ok(Self {
233            name,
234            expression,
235            data_type,
236            nullable: true,
237            cardinality: None,
238            description: None,
239        })
240    }
241
242    /// Get the dimension name
243    pub fn name(&self) -> &str {
244        &self.name
245    }
246
247    /// Get the SQL expression
248    pub fn expression(&self) -> &str {
249        &self.expression
250    }
251
252    /// Get the data type
253    pub fn data_type(&self) -> &DataType {
254        &self.data_type
255    }
256
257    /// Check if the dimension is nullable
258    pub fn is_nullable(&self) -> bool {
259        self.nullable
260    }
261
262    /// Get the cardinality
263    pub fn cardinality(&self) -> Option<usize> {
264        self.cardinality
265    }
266
267    /// Get the description
268    pub fn description(&self) -> Option<&str> {
269        self.description.as_deref()
270    }
271
272    /// Builder-style: set nullable
273    pub fn with_nullable(mut self, nullable: bool) -> Self {
274        self.nullable = nullable;
275        self
276    }
277
278    /// Builder-style: set cardinality
279    pub fn with_cardinality(mut self, cardinality: usize) -> Self {
280        self.cardinality = Some(cardinality);
281        self
282    }
283
284    /// Builder-style: set description
285    pub fn with_description(mut self, description: impl Into<String>) -> Self {
286        self.description = Some(description.into());
287        self
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn test_calculated_measure_creation() {
297        let measure = CalculatedMeasure::new(
298            "profit",
299            "revenue - cost",
300            DataType::Float64,
301            AggFunc::Sum,
302        )
303        .unwrap();
304
305        assert_eq!(measure.name(), "profit");
306        assert_eq!(measure.expression(), "revenue - cost");
307        assert_eq!(measure.data_type(), &DataType::Float64);
308        assert_eq!(measure.default_agg(), AggFunc::Sum);
309        assert!(measure.is_nullable());
310    }
311
312    #[test]
313    fn test_calculated_measure_validation() {
314        // Empty name should fail
315        let result = CalculatedMeasure::new("", "a + b", DataType::Float64, AggFunc::Sum);
316        assert!(result.is_err());
317
318        // Empty expression should fail
319        let result = CalculatedMeasure::new("test", "", DataType::Float64, AggFunc::Sum);
320        assert!(result.is_err());
321
322        // Incompatible aggregation should fail
323        let result = CalculatedMeasure::new("test", "a || b", DataType::Utf8, AggFunc::Sum);
324        assert!(result.is_err());
325    }
326
327    #[test]
328    fn test_calculated_measure_builder() {
329        let measure = CalculatedMeasure::new(
330            "margin",
331            "profit / revenue * 100",
332            DataType::Float64,
333            AggFunc::Avg,
334        )
335        .unwrap()
336        .with_nullable(false)
337        .with_description("Profit margin percentage")
338        .with_format(",.2f%");
339
340        assert_eq!(measure.name(), "margin");
341        assert!(!measure.is_nullable());
342        assert_eq!(measure.description(), Some("Profit margin percentage"));
343        assert_eq!(measure.format(), Some(",.2f%"));
344    }
345
346    #[test]
347    fn test_virtual_dimension_creation() {
348        let dim = VirtualDimension::new(
349            "year",
350            "EXTRACT(YEAR FROM sale_date)",
351            DataType::Int32,
352        )
353        .unwrap();
354
355        assert_eq!(dim.name(), "year");
356        assert_eq!(dim.expression(), "EXTRACT(YEAR FROM sale_date)");
357        assert_eq!(dim.data_type(), &DataType::Int32);
358        assert!(dim.is_nullable());
359    }
360
361    #[test]
362    fn test_virtual_dimension_validation() {
363        // Empty name should fail
364        let result = VirtualDimension::new("", "EXTRACT(YEAR FROM date)", DataType::Int32);
365        assert!(result.is_err());
366
367        // Empty expression should fail
368        let result = VirtualDimension::new("year", "", DataType::Int32);
369        assert!(result.is_err());
370    }
371
372    #[test]
373    fn test_virtual_dimension_builder() {
374        let dim = VirtualDimension::new(
375            "age_group",
376            "CASE WHEN age < 18 THEN 'Minor' ELSE 'Adult' END",
377            DataType::Utf8,
378        )
379        .unwrap()
380        .with_nullable(false)
381        .with_cardinality(2)
382        .with_description("Age category");
383
384        assert_eq!(dim.name(), "age_group");
385        assert!(!dim.is_nullable());
386        assert_eq!(dim.cardinality(), Some(2));
387        assert_eq!(dim.description(), Some("Age category"));
388    }
389}