elasticube_core/cube/
schema.rs

1//! Schema metadata for ElastiCube
2
3use super::{CalculatedMeasure, Dimension, Hierarchy, Measure, VirtualDimension};
4use crate::error::{Error, Result};
5use indexmap::IndexMap;
6use serde::{Deserialize, Serialize};
7
8/// Schema metadata for an ElastiCube
9///
10/// Contains all metadata about dimensions, measures, and hierarchies,
11/// providing a semantic layer over the raw Arrow data.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct CubeSchema {
14    /// Name of the cube
15    name: String,
16
17    /// Dimensions indexed by name for fast lookup
18    dimensions: IndexMap<String, Dimension>,
19
20    /// Measures indexed by name for fast lookup
21    measures: IndexMap<String, Measure>,
22
23    /// Hierarchies indexed by name for fast lookup
24    hierarchies: IndexMap<String, Hierarchy>,
25
26    /// Calculated measures (derived from expressions)
27    calculated_measures: IndexMap<String, CalculatedMeasure>,
28
29    /// Virtual dimensions (computed dimensions)
30    virtual_dimensions: IndexMap<String, VirtualDimension>,
31
32    /// Optional description
33    description: Option<String>,
34}
35
36impl CubeSchema {
37    /// Create a new cube schema
38    pub fn new(name: impl Into<String>) -> Self {
39        Self {
40            name: name.into(),
41            dimensions: IndexMap::new(),
42            measures: IndexMap::new(),
43            hierarchies: IndexMap::new(),
44            calculated_measures: IndexMap::new(),
45            virtual_dimensions: IndexMap::new(),
46            description: None,
47        }
48    }
49
50    /// Get the cube name
51    pub fn name(&self) -> &str {
52        &self.name
53    }
54
55    /// Get the description
56    pub fn description(&self) -> Option<&str> {
57        self.description.as_deref()
58    }
59
60    /// Set the description
61    pub fn set_description(&mut self, description: impl Into<String>) {
62        self.description = Some(description.into());
63    }
64
65    /// Add a dimension to the schema
66    pub fn add_dimension(&mut self, dimension: Dimension) -> Result<()> {
67        let name = dimension.name().to_string();
68        if self.dimensions.contains_key(&name) {
69            return Err(Error::dimension(format!(
70                "Dimension '{}' already exists",
71                name
72            )));
73        }
74        self.dimensions.insert(name, dimension);
75        Ok(())
76    }
77
78    /// Add a measure to the schema
79    pub fn add_measure(&mut self, measure: Measure) -> Result<()> {
80        // Validate the measure
81        measure.validate().map_err(Error::measure)?;
82
83        let name = measure.name().to_string();
84        if self.measures.contains_key(&name) {
85            return Err(Error::measure(format!("Measure '{}' already exists", name)));
86        }
87        self.measures.insert(name, measure);
88        Ok(())
89    }
90
91    /// Add a hierarchy to the schema
92    pub fn add_hierarchy(&mut self, hierarchy: Hierarchy) -> Result<()> {
93        // Validate the hierarchy
94        hierarchy.validate().map_err(Error::hierarchy)?;
95
96        // Validate that all levels in the hierarchy reference existing dimensions
97        for level in hierarchy.levels() {
98            if !self.dimensions.contains_key(level) {
99                return Err(Error::hierarchy(format!(
100                    "Hierarchy '{}' references non-existent dimension '{}'",
101                    hierarchy.name(),
102                    level
103                )));
104            }
105        }
106
107        let name = hierarchy.name().to_string();
108        if self.hierarchies.contains_key(&name) {
109            return Err(Error::hierarchy(format!(
110                "Hierarchy '{}' already exists",
111                name
112            )));
113        }
114        self.hierarchies.insert(name, hierarchy);
115        Ok(())
116    }
117
118    /// Get all dimensions
119    pub fn dimensions(&self) -> Vec<&Dimension> {
120        self.dimensions.values().collect()
121    }
122
123    /// Get all measures
124    pub fn measures(&self) -> Vec<&Measure> {
125        self.measures.values().collect()
126    }
127
128    /// Get all hierarchies
129    pub fn hierarchies(&self) -> Vec<&Hierarchy> {
130        self.hierarchies.values().collect()
131    }
132
133    /// Get a dimension by name
134    pub fn get_dimension(&self, name: &str) -> Option<&Dimension> {
135        self.dimensions.get(name)
136    }
137
138    /// Get a mutable dimension by name
139    pub fn get_dimension_mut(&mut self, name: &str) -> Option<&mut Dimension> {
140        self.dimensions.get_mut(name)
141    }
142
143    /// Get a measure by name
144    pub fn get_measure(&self, name: &str) -> Option<&Measure> {
145        self.measures.get(name)
146    }
147
148    /// Get a mutable measure by name
149    pub fn get_measure_mut(&mut self, name: &str) -> Option<&mut Measure> {
150        self.measures.get_mut(name)
151    }
152
153    /// Get a hierarchy by name
154    pub fn get_hierarchy(&self, name: &str) -> Option<&Hierarchy> {
155        self.hierarchies.get(name)
156    }
157
158    /// Remove a dimension
159    pub fn remove_dimension(&mut self, name: &str) -> Result<Dimension> {
160        // Check if any hierarchies reference this dimension
161        for hierarchy in self.hierarchies.values() {
162            if hierarchy.contains_level(name) {
163                return Err(Error::dimension(format!(
164                    "Cannot remove dimension '{}': referenced by hierarchy '{}'",
165                    name,
166                    hierarchy.name()
167                )));
168            }
169        }
170
171        self.dimensions
172            .shift_remove(name)
173            .ok_or_else(|| Error::dimension(format!("Dimension '{}' not found", name)))
174    }
175
176    /// Remove a measure
177    pub fn remove_measure(&mut self, name: &str) -> Result<Measure> {
178        self.measures
179            .shift_remove(name)
180            .ok_or_else(|| Error::measure(format!("Measure '{}' not found", name)))
181    }
182
183    /// Remove a hierarchy
184    pub fn remove_hierarchy(&mut self, name: &str) -> Result<Hierarchy> {
185        self.hierarchies
186            .shift_remove(name)
187            .ok_or_else(|| Error::hierarchy(format!("Hierarchy '{}' not found", name)))
188    }
189
190    /// Get the number of dimensions
191    pub fn dimension_count(&self) -> usize {
192        self.dimensions.len()
193    }
194
195    /// Get the number of measures
196    pub fn measure_count(&self) -> usize {
197        self.measures.len()
198    }
199
200    /// Get the number of hierarchies
201    pub fn hierarchy_count(&self) -> usize {
202        self.hierarchies.len()
203    }
204
205    /// Check if a dimension exists
206    pub fn has_dimension(&self, name: &str) -> bool {
207        self.dimensions.contains_key(name)
208    }
209
210    /// Check if a measure exists
211    pub fn has_measure(&self, name: &str) -> bool {
212        self.measures.contains_key(name)
213    }
214
215    /// Check if a hierarchy exists
216    pub fn has_hierarchy(&self, name: &str) -> bool {
217        self.hierarchies.contains_key(name)
218    }
219
220    /// Get all dimension names
221    pub fn dimension_names(&self) -> Vec<&str> {
222        self.dimensions.keys().map(|s| s.as_str()).collect()
223    }
224
225    /// Get all measure names
226    pub fn measure_names(&self) -> Vec<&str> {
227        self.measures.keys().map(|s| s.as_str()).collect()
228    }
229
230    /// Get all hierarchy names
231    pub fn hierarchy_names(&self) -> Vec<&str> {
232        self.hierarchies.keys().map(|s| s.as_str()).collect()
233    }
234
235    /// Add a calculated measure to the schema
236    pub fn add_calculated_measure(&mut self, calc_measure: CalculatedMeasure) -> Result<()> {
237        let name = calc_measure.name().to_string();
238
239        // Check for name conflicts with regular measures and calculated measures
240        if self.measures.contains_key(&name) {
241            return Err(Error::measure(format!(
242                "A measure named '{}' already exists",
243                name
244            )));
245        }
246        if self.calculated_measures.contains_key(&name) {
247            return Err(Error::measure(format!(
248                "Calculated measure '{}' already exists",
249                name
250            )));
251        }
252
253        self.calculated_measures.insert(name, calc_measure);
254        Ok(())
255    }
256
257    /// Add a virtual dimension to the schema
258    pub fn add_virtual_dimension(&mut self, virtual_dim: VirtualDimension) -> Result<()> {
259        let name = virtual_dim.name().to_string();
260
261        // Check for name conflicts with regular dimensions and virtual dimensions
262        if self.dimensions.contains_key(&name) {
263            return Err(Error::dimension(format!(
264                "A dimension named '{}' already exists",
265                name
266            )));
267        }
268        if self.virtual_dimensions.contains_key(&name) {
269            return Err(Error::dimension(format!(
270                "Virtual dimension '{}' already exists",
271                name
272            )));
273        }
274
275        self.virtual_dimensions.insert(name, virtual_dim);
276        Ok(())
277    }
278
279    /// Get all calculated measures
280    pub fn calculated_measures(&self) -> Vec<&CalculatedMeasure> {
281        self.calculated_measures.values().collect()
282    }
283
284    /// Get all virtual dimensions
285    pub fn virtual_dimensions(&self) -> Vec<&VirtualDimension> {
286        self.virtual_dimensions.values().collect()
287    }
288
289    /// Get a calculated measure by name
290    pub fn get_calculated_measure(&self, name: &str) -> Option<&CalculatedMeasure> {
291        self.calculated_measures.get(name)
292    }
293
294    /// Get a virtual dimension by name
295    pub fn get_virtual_dimension(&self, name: &str) -> Option<&VirtualDimension> {
296        self.virtual_dimensions.get(name)
297    }
298
299    /// Remove a calculated measure
300    pub fn remove_calculated_measure(&mut self, name: &str) -> Result<CalculatedMeasure> {
301        self.calculated_measures.shift_remove(name).ok_or_else(|| {
302            Error::measure(format!("Calculated measure '{}' not found", name))
303        })
304    }
305
306    /// Remove a virtual dimension
307    pub fn remove_virtual_dimension(&mut self, name: &str) -> Result<VirtualDimension> {
308        self.virtual_dimensions.shift_remove(name).ok_or_else(|| {
309            Error::dimension(format!("Virtual dimension '{}' not found", name))
310        })
311    }
312
313    /// Check if a calculated measure exists
314    pub fn has_calculated_measure(&self, name: &str) -> bool {
315        self.calculated_measures.contains_key(name)
316    }
317
318    /// Check if a virtual dimension exists
319    pub fn has_virtual_dimension(&self, name: &str) -> bool {
320        self.virtual_dimensions.contains_key(name)
321    }
322
323    /// Get the number of calculated measures
324    pub fn calculated_measure_count(&self) -> usize {
325        self.calculated_measures.len()
326    }
327
328    /// Get the number of virtual dimensions
329    pub fn virtual_dimension_count(&self) -> usize {
330        self.virtual_dimensions.len()
331    }
332
333    /// Convert CubeSchema to Arrow Schema
334    ///
335    /// Creates an Arrow schema containing fields for all dimensions and measures.
336    /// The order is: dimensions first (in insertion order), then measures.
337    pub fn to_arrow_schema(&self) -> arrow::datatypes::Schema {
338        use arrow::datatypes::Field;
339
340        let mut fields = Vec::new();
341
342        // Add dimension fields
343        for dim in self.dimensions.values() {
344            fields.push(Field::new(
345                dim.name(),
346                dim.data_type().clone(),
347                true, // nullable by default
348            ));
349        }
350
351        // Add measure fields
352        for measure in self.measures.values() {
353            fields.push(Field::new(
354                measure.name(),
355                measure.data_type().clone(),
356                true, // nullable by default
357            ));
358        }
359
360        arrow::datatypes::Schema::new(fields)
361    }
362}
363
364#[cfg(test)]
365mod tests {
366    use super::*;
367    use crate::cube::{AggFunc, Dimension, Hierarchy, Measure};
368    use arrow::datatypes::DataType;
369
370    #[test]
371    fn test_schema_creation() {
372        let schema = CubeSchema::new("sales_cube");
373        assert_eq!(schema.name(), "sales_cube");
374        assert_eq!(schema.dimension_count(), 0);
375        assert_eq!(schema.measure_count(), 0);
376    }
377
378    #[test]
379    fn test_add_dimension() {
380        let mut schema = CubeSchema::new("test");
381        let dim = Dimension::new("region", DataType::Utf8);
382
383        assert!(schema.add_dimension(dim).is_ok());
384        assert_eq!(schema.dimension_count(), 1);
385        assert!(schema.has_dimension("region"));
386
387        // Test duplicate
388        let dim2 = Dimension::new("region", DataType::Utf8);
389        assert!(schema.add_dimension(dim2).is_err());
390    }
391
392    #[test]
393    fn test_add_measure() {
394        let mut schema = CubeSchema::new("test");
395        let measure = Measure::new("sales", DataType::Float64, AggFunc::Sum);
396
397        assert!(schema.add_measure(measure).is_ok());
398        assert_eq!(schema.measure_count(), 1);
399        assert!(schema.has_measure("sales"));
400    }
401
402    #[test]
403    fn test_add_hierarchy() {
404        let mut schema = CubeSchema::new("test");
405
406        // Add dimensions first
407        schema
408            .add_dimension(Dimension::new("year", DataType::Int32))
409            .unwrap();
410        schema
411            .add_dimension(Dimension::new("quarter", DataType::Int32))
412            .unwrap();
413        schema
414            .add_dimension(Dimension::new("month", DataType::Int32))
415            .unwrap();
416
417        // Add hierarchy
418        let hierarchy = Hierarchy::new(
419            "time",
420            vec!["year".to_string(), "quarter".to_string(), "month".to_string()],
421        );
422
423        assert!(schema.add_hierarchy(hierarchy).is_ok());
424        assert_eq!(schema.hierarchy_count(), 1);
425        assert!(schema.has_hierarchy("time"));
426    }
427
428    #[test]
429    fn test_hierarchy_validation() {
430        let mut schema = CubeSchema::new("test");
431
432        // Try to add hierarchy without dimensions
433        let hierarchy = Hierarchy::new("time", vec!["year".to_string(), "month".to_string()]);
434
435        assert!(schema.add_hierarchy(hierarchy).is_err());
436    }
437
438    #[test]
439    fn test_remove_dimension_with_hierarchy() {
440        let mut schema = CubeSchema::new("test");
441
442        schema
443            .add_dimension(Dimension::new("year", DataType::Int32))
444            .unwrap();
445        schema
446            .add_dimension(Dimension::new("month", DataType::Int32))
447            .unwrap();
448
449        let hierarchy = Hierarchy::new("time", vec!["year".to_string(), "month".to_string()]);
450        schema.add_hierarchy(hierarchy).unwrap();
451
452        // Should fail because hierarchy references it
453        assert!(schema.remove_dimension("year").is_err());
454
455        // Remove hierarchy first
456        schema.remove_hierarchy("time").unwrap();
457
458        // Now should succeed
459        assert!(schema.remove_dimension("year").is_ok());
460    }
461
462    #[test]
463    fn test_add_calculated_measure() {
464        use super::CalculatedMeasure;
465
466        let mut schema = CubeSchema::new("test");
467
468        // Add base measures first
469        schema
470            .add_measure(Measure::new("revenue", DataType::Float64, AggFunc::Sum))
471            .unwrap();
472        schema
473            .add_measure(Measure::new("cost", DataType::Float64, AggFunc::Sum))
474            .unwrap();
475
476        // Add calculated measure
477        let profit = CalculatedMeasure::new(
478            "profit",
479            "revenue - cost",
480            DataType::Float64,
481            AggFunc::Sum,
482        )
483        .unwrap();
484
485        assert!(schema.add_calculated_measure(profit).is_ok());
486        assert_eq!(schema.calculated_measure_count(), 1);
487        assert!(schema.has_calculated_measure("profit"));
488
489        // Test duplicate
490        let profit2 = CalculatedMeasure::new(
491            "profit",
492            "revenue - cost",
493            DataType::Float64,
494            AggFunc::Sum,
495        )
496        .unwrap();
497        assert!(schema.add_calculated_measure(profit2).is_err());
498    }
499
500    #[test]
501    fn test_add_virtual_dimension() {
502        use super::VirtualDimension;
503
504        let mut schema = CubeSchema::new("test");
505
506        // Add base dimension
507        schema
508            .add_dimension(Dimension::new("sale_date", DataType::Date32))
509            .unwrap();
510
511        // Add virtual dimension
512        let year = VirtualDimension::new(
513            "year",
514            "EXTRACT(YEAR FROM sale_date)",
515            DataType::Int32,
516        )
517        .unwrap();
518
519        assert!(schema.add_virtual_dimension(year).is_ok());
520        assert_eq!(schema.virtual_dimension_count(), 1);
521        assert!(schema.has_virtual_dimension("year"));
522
523        // Test duplicate
524        let year2 =
525            VirtualDimension::new("year", "EXTRACT(YEAR FROM sale_date)", DataType::Int32)
526                .unwrap();
527        assert!(schema.add_virtual_dimension(year2).is_err());
528    }
529
530    #[test]
531    fn test_calculated_measure_name_conflict() {
532        use super::CalculatedMeasure;
533
534        let mut schema = CubeSchema::new("test");
535
536        // Add a regular measure
537        schema
538            .add_measure(Measure::new("sales", DataType::Float64, AggFunc::Sum))
539            .unwrap();
540
541        // Try to add calculated measure with same name - should fail
542        let calc_sales =
543            CalculatedMeasure::new("sales", "revenue * 0.8", DataType::Float64, AggFunc::Sum)
544                .unwrap();
545        assert!(schema.add_calculated_measure(calc_sales).is_err());
546    }
547
548    #[test]
549    fn test_virtual_dimension_name_conflict() {
550        use super::VirtualDimension;
551
552        let mut schema = CubeSchema::new("test");
553
554        // Add a regular dimension
555        schema
556            .add_dimension(Dimension::new("region", DataType::Utf8))
557            .unwrap();
558
559        // Try to add virtual dimension with same name - should fail
560        let virtual_region =
561            VirtualDimension::new("region", "UPPER(region)", DataType::Utf8).unwrap();
562        assert!(schema.add_virtual_dimension(virtual_region).is_err());
563    }
564
565    #[test]
566    fn test_get_calculated_measure() {
567        use super::CalculatedMeasure;
568
569        let mut schema = CubeSchema::new("test");
570
571        let margin =
572            CalculatedMeasure::new("margin", "profit / revenue", DataType::Float64, AggFunc::Avg)
573                .unwrap();
574        schema.add_calculated_measure(margin).unwrap();
575
576        let retrieved = schema.get_calculated_measure("margin").unwrap();
577        assert_eq!(retrieved.name(), "margin");
578        assert_eq!(retrieved.expression(), "profit / revenue");
579    }
580
581    #[test]
582    fn test_remove_calculated_measure() {
583        use super::CalculatedMeasure;
584
585        let mut schema = CubeSchema::new("test");
586
587        let calc = CalculatedMeasure::new("test", "a + b", DataType::Float64, AggFunc::Sum)
588            .unwrap();
589        schema.add_calculated_measure(calc).unwrap();
590
591        assert!(schema.remove_calculated_measure("test").is_ok());
592        assert_eq!(schema.calculated_measure_count(), 0);
593
594        // Try to remove again - should fail
595        assert!(schema.remove_calculated_measure("test").is_err());
596    }
597}