elasticube_core/
builder.rs

1//! ElastiCube builder for constructing cubes
2
3use crate::cube::{
4    AggFunc, CalculatedMeasure, CubeSchema, Dimension, ElastiCube, Hierarchy, Measure,
5    VirtualDimension,
6};
7use crate::error::{Error, Result};
8use crate::sources::{CsvSource, DataSource, JsonSource, ParquetSource, RecordBatchSource};
9use arrow::datatypes::{DataType, Schema as ArrowSchema};
10use arrow::record_batch::RecordBatch;
11use std::sync::Arc;
12
13/// Builder for constructing an ElastiCube
14///
15/// Provides a fluent API for defining dimensions, measures, hierarchies,
16/// and loading data from various sources.
17#[derive(Debug)]
18pub struct ElastiCubeBuilder {
19    schema: CubeSchema,
20    data_source: Option<Box<dyn DataSource>>,
21}
22
23impl ElastiCubeBuilder {
24    /// Create a new builder
25    pub fn new(name: impl Into<String>) -> Self {
26        Self {
27            schema: CubeSchema::new(name),
28            data_source: None,
29        }
30    }
31
32    /// Add a dimension
33    pub fn add_dimension(
34        mut self,
35        name: impl Into<String>,
36        data_type: DataType,
37    ) -> Result<Self> {
38        let dimension = Dimension::new(name, data_type);
39        self.schema.add_dimension(dimension)?;
40        Ok(self)
41    }
42
43    /// Add a measure
44    pub fn add_measure(
45        mut self,
46        name: impl Into<String>,
47        data_type: DataType,
48        agg_func: AggFunc,
49    ) -> Result<Self> {
50        let measure = Measure::new(name, data_type, agg_func);
51        self.schema.add_measure(measure)?;
52        Ok(self)
53    }
54
55    /// Add a hierarchy
56    pub fn add_hierarchy(
57        mut self,
58        name: impl Into<String>,
59        levels: Vec<String>,
60    ) -> Result<Self> {
61        let hierarchy = Hierarchy::new(name, levels);
62        self.schema.add_hierarchy(hierarchy)?;
63        Ok(self)
64    }
65
66    /// Add a calculated measure (derived from an expression)
67    ///
68    /// # Arguments
69    /// * `name` - Name for the calculated measure
70    /// * `expression` - SQL expression (e.g., "revenue - cost")
71    /// * `data_type` - Expected result data type
72    /// * `agg_func` - Default aggregation function
73    ///
74    /// # Example
75    /// ```rust,ignore
76    /// let cube = ElastiCubeBuilder::new("sales")
77    ///     .add_measure("revenue", DataType::Float64, AggFunc::Sum)?
78    ///     .add_measure("cost", DataType::Float64, AggFunc::Sum)?
79    ///     .add_calculated_measure(
80    ///         "profit",
81    ///         "revenue - cost",
82    ///         DataType::Float64,
83    ///         AggFunc::Sum
84    ///     )?
85    ///     .build()?;
86    /// ```
87    pub fn add_calculated_measure(
88        mut self,
89        name: impl Into<String>,
90        expression: impl Into<String>,
91        data_type: DataType,
92        agg_func: AggFunc,
93    ) -> Result<Self> {
94        let calc_measure = CalculatedMeasure::new(name, expression, data_type, agg_func)?;
95        self.schema.add_calculated_measure(calc_measure)?;
96        Ok(self)
97    }
98
99    /// Add a virtual dimension (computed dimension)
100    ///
101    /// # Arguments
102    /// * `name` - Name for the virtual dimension
103    /// * `expression` - SQL expression (e.g., "EXTRACT(YEAR FROM date)")
104    /// * `data_type` - Expected result data type
105    ///
106    /// # Example
107    /// ```rust,ignore
108    /// let cube = ElastiCubeBuilder::new("sales")
109    ///     .add_dimension("sale_date", DataType::Date32)?
110    ///     .add_virtual_dimension(
111    ///         "year",
112    ///         "EXTRACT(YEAR FROM sale_date)",
113    ///         DataType::Int32
114    ///     )?
115    ///     .build()?;
116    /// ```
117    pub fn add_virtual_dimension(
118        mut self,
119        name: impl Into<String>,
120        expression: impl Into<String>,
121        data_type: DataType,
122    ) -> Result<Self> {
123        let virtual_dim = VirtualDimension::new(name, expression, data_type)?;
124        self.schema.add_virtual_dimension(virtual_dim)?;
125        Ok(self)
126    }
127
128    /// Set the cube description
129    pub fn with_description(mut self, description: impl Into<String>) -> Self {
130        self.schema.set_description(description);
131        self
132    }
133
134    /// Load data from a CSV file
135    ///
136    /// # Arguments
137    /// * `path` - Path to the CSV file
138    ///
139    /// # Example
140    /// ```rust,ignore
141    /// let cube = ElastiCubeBuilder::new("sales")
142    ///     .load_csv("data.csv")?
143    ///     .build()?;
144    /// ```
145    pub fn load_csv(mut self, path: impl Into<String>) -> Self {
146        let source = CsvSource::new(path);
147        self.data_source = Some(Box::new(source));
148        self
149    }
150
151    /// Load data from a CSV file with custom configuration
152    ///
153    /// # Arguments
154    /// * `source` - Configured CsvSource
155    ///
156    /// # Example
157    /// ```rust,ignore
158    /// let source = CsvSource::new("data.csv")
159    ///     .with_delimiter(b';')
160    ///     .with_batch_size(4096);
161    /// let cube = ElastiCubeBuilder::new("sales")
162    ///     .load_csv_with(source)
163    ///     .build()?;
164    /// ```
165    pub fn load_csv_with(mut self, source: CsvSource) -> Self {
166        self.data_source = Some(Box::new(source));
167        self
168    }
169
170    /// Load data from a Parquet file
171    ///
172    /// # Arguments
173    /// * `path` - Path to the Parquet file
174    pub fn load_parquet(mut self, path: impl Into<String>) -> Self {
175        let source = ParquetSource::new(path);
176        self.data_source = Some(Box::new(source));
177        self
178    }
179
180    /// Load data from a Parquet file with custom configuration
181    pub fn load_parquet_with(mut self, source: ParquetSource) -> Self {
182        self.data_source = Some(Box::new(source));
183        self
184    }
185
186    /// Load data from a JSON file
187    ///
188    /// # Arguments
189    /// * `path` - Path to the JSON file
190    pub fn load_json(mut self, path: impl Into<String>) -> Self {
191        let source = JsonSource::new(path);
192        self.data_source = Some(Box::new(source));
193        self
194    }
195
196    /// Load data from a JSON file with custom configuration
197    pub fn load_json_with(mut self, source: JsonSource) -> Self {
198        self.data_source = Some(Box::new(source));
199        self
200    }
201
202    /// Load data from Arrow RecordBatches
203    ///
204    /// # Arguments
205    /// * `schema` - Arrow schema for the batches
206    /// * `batches` - Vector of RecordBatches containing the data
207    pub fn load_record_batches(
208        mut self,
209        schema: Arc<ArrowSchema>,
210        batches: Vec<RecordBatch>,
211    ) -> Result<Self> {
212        let source = RecordBatchSource::new(schema, batches)?;
213        self.data_source = Some(Box::new(source));
214        Ok(self)
215    }
216
217    /// Load data from RecordBatches (convenience method for testing)
218    ///
219    /// Infers schema from the first batch. All batches must have the same schema.
220    ///
221    /// # Arguments
222    /// * `batches` - Vector of RecordBatches containing the data
223    ///
224    /// # Example
225    /// ```rust,ignore
226    /// let batch = RecordBatch::try_new(schema, columns)?;
227    /// let cube = ElastiCubeBuilder::new("test")
228    ///     .with_data(vec![batch])?
229    ///     .build()?;
230    /// ```
231    pub fn with_data(mut self, batches: Vec<RecordBatch>) -> Result<Self> {
232        if batches.is_empty() {
233            return Err(Error::builder("Cannot load empty batch vector"));
234        }
235
236        let schema = batches[0].schema();
237        let source = RecordBatchSource::new(schema, batches)?;
238        self.data_source = Some(Box::new(source));
239        Ok(self)
240    }
241
242    // ==============================================================================
243    // Database Sources (available with "database" feature)
244    // ==============================================================================
245
246    /// Load data from PostgreSQL database
247    ///
248    /// Requires the "database" feature to be enabled.
249    ///
250    /// # Arguments
251    /// * `host` - Database host (e.g., "localhost")
252    /// * `database` - Database name
253    /// * `username` - Username for authentication
254    /// * `password` - Password for authentication
255    /// * `query` - SQL query to execute
256    ///
257    /// # Example
258    /// ```rust,ignore
259    /// let cube = ElastiCubeBuilder::new("sales")
260    ///     .load_postgres("localhost", "mydb", "user", "pass", "SELECT * FROM sales")?
261    ///     .build()?;
262    /// ```
263    #[cfg(feature = "database")]
264    pub fn load_postgres(
265        mut self,
266        host: impl Into<String>,
267        database: impl Into<String>,
268        username: impl Into<String>,
269        password: impl Into<String>,
270        query: impl Into<String>,
271    ) -> Self {
272        use crate::sources::database::PostgresSource;
273        let source = PostgresSource::new(host, database, username, password)
274            .with_query(query);
275        self.data_source = Some(Box::new(source));
276        self
277    }
278
279    /// Load data from PostgreSQL with custom configuration
280    ///
281    /// Requires the "database" feature to be enabled.
282    ///
283    /// # Example
284    /// ```rust,ignore
285    /// let source = PostgresSource::new("localhost", "mydb", "user", "pass")
286    ///     .with_port(5433)
287    ///     .with_query("SELECT * FROM sales WHERE year = 2024")
288    ///     .with_batch_size(4096);
289    ///
290    /// let cube = ElastiCubeBuilder::new("sales")
291    ///     .load_postgres_with(source)
292    ///     .build()?;
293    /// ```
294    #[cfg(feature = "database")]
295    pub fn load_postgres_with(mut self, source: crate::sources::database::PostgresSource) -> Self {
296        self.data_source = Some(Box::new(source));
297        self
298    }
299
300    /// Load data from MySQL database
301    ///
302    /// Requires the "database" feature to be enabled.
303    ///
304    /// # Arguments
305    /// * `host` - Database host (e.g., "localhost")
306    /// * `database` - Database name
307    /// * `username` - Username for authentication
308    /// * `password` - Password for authentication
309    /// * `query` - SQL query to execute
310    ///
311    /// # Example
312    /// ```rust,ignore
313    /// let cube = ElastiCubeBuilder::new("orders")
314    ///     .load_mysql("localhost", "mydb", "user", "pass", "SELECT * FROM orders")?
315    ///     .build()?;
316    /// ```
317    #[cfg(feature = "database")]
318    pub fn load_mysql(
319        mut self,
320        host: impl Into<String>,
321        database: impl Into<String>,
322        username: impl Into<String>,
323        password: impl Into<String>,
324        query: impl Into<String>,
325    ) -> Self {
326        use crate::sources::database::MySqlSource;
327        let source = MySqlSource::new(host, database, username, password)
328            .with_query(query);
329        self.data_source = Some(Box::new(source));
330        self
331    }
332
333    /// Load data from MySQL with custom configuration
334    ///
335    /// Requires the "database" feature to be enabled.
336    #[cfg(feature = "database")]
337    pub fn load_mysql_with(mut self, source: crate::sources::database::MySqlSource) -> Self {
338        self.data_source = Some(Box::new(source));
339        self
340    }
341
342    /// Load data via generic ODBC connection
343    ///
344    /// Supports any ODBC-compatible database (PostgreSQL, MySQL, SQL Server, SQLite, etc.).
345    /// Requires the "database" feature to be enabled.
346    ///
347    /// # Arguments
348    /// * `connection_string` - ODBC connection string
349    /// * `query` - SQL query to execute
350    ///
351    /// # Example Connection Strings
352    ///
353    /// **PostgreSQL**:
354    /// ```text
355    /// Driver={PostgreSQL Unicode};Server=localhost;Port=5432;Database=mydb;Uid=user;Pwd=pass;
356    /// ```
357    ///
358    /// **SQL Server**:
359    /// ```text
360    /// Driver={ODBC Driver 17 for SQL Server};Server=localhost;Database=mydb;Uid=user;Pwd=pass;
361    /// ```
362    ///
363    /// # Example
364    /// ```rust,ignore
365    /// let cube = ElastiCubeBuilder::new("analytics")
366    ///     .load_odbc(
367    ///         "Driver={PostgreSQL Unicode};Server=localhost;Database=analytics;Uid=admin;Pwd=secret;",
368    ///         "SELECT * FROM metrics WHERE date >= '2024-01-01'"
369    ///     )
370    ///     .build()?;
371    /// ```
372    #[cfg(feature = "database")]
373    pub fn load_odbc(
374        mut self,
375        connection_string: impl Into<String>,
376        query: impl Into<String>,
377    ) -> Self {
378        use crate::sources::database::OdbcSource;
379        let source = OdbcSource::new(connection_string, query);
380        self.data_source = Some(Box::new(source));
381        self
382    }
383
384    /// Load data via ODBC with custom configuration
385    ///
386    /// Requires the "database" feature to be enabled.
387    #[cfg(feature = "database")]
388    pub fn load_odbc_with(mut self, source: crate::sources::database::OdbcSource) -> Self {
389        self.data_source = Some(Box::new(source));
390        self
391    }
392
393    // ==============================================================================
394    // REST API Sources (available with "rest-api" feature)
395    // ==============================================================================
396
397    /// Load data from a REST API endpoint
398    ///
399    /// Requires the "rest-api" feature to be enabled.
400    /// The API must return JSON data (either an array of objects or a single object).
401    ///
402    /// # Arguments
403    /// * `url` - API endpoint URL
404    ///
405    /// # Example
406    /// ```rust,ignore
407    /// let cube = ElastiCubeBuilder::new("api_data")
408    ///     .load_rest_api("https://api.example.com/sales")
409    ///     .build()?;
410    /// ```
411    #[cfg(feature = "rest-api")]
412    pub fn load_rest_api(mut self, url: impl Into<String>) -> Self {
413        use crate::sources::rest::RestApiSource;
414        let source = RestApiSource::new(url);
415        self.data_source = Some(Box::new(source));
416        self
417    }
418
419    /// Load data from a REST API with custom configuration
420    ///
421    /// Requires the "rest-api" feature to be enabled.
422    ///
423    /// # Example
424    /// ```rust,ignore
425    /// let source = RestApiSource::new("https://api.example.com/data")
426    ///     .with_method(HttpMethod::Post)
427    ///     .with_header("Authorization", "Bearer token123")
428    ///     .with_query_param("limit", "1000")
429    ///     .with_timeout_secs(60);
430    ///
431    /// let cube = ElastiCubeBuilder::new("api_data")
432    ///     .load_rest_api_with(source)
433    ///     .build()?;
434    /// ```
435    #[cfg(feature = "rest-api")]
436    pub fn load_rest_api_with(mut self, source: crate::sources::rest::RestApiSource) -> Self {
437        self.data_source = Some(Box::new(source));
438        self
439    }
440
441    // ==============================================================================
442    // Object Storage Sources (available with "object-storage" feature)
443    // ==============================================================================
444
445    /// Load data from AWS S3
446    ///
447    /// Requires the "object-storage" feature to be enabled.
448    ///
449    /// # Arguments
450    /// * `bucket` - S3 bucket name
451    /// * `path` - Path to the file in the bucket (e.g., "data/sales.parquet")
452    ///
453    /// # Example
454    /// ```rust,ignore
455    /// // Uses AWS credentials from environment or ~/.aws/credentials
456    /// let cube = ElastiCubeBuilder::new("sales")
457    ///     .load_s3("my-bucket", "data/sales.parquet")
458    ///     .build()?;
459    /// ```
460    #[cfg(feature = "object-storage")]
461    pub fn load_s3(
462        mut self,
463        bucket: impl Into<String>,
464        path: impl Into<String>,
465    ) -> Self {
466        use crate::sources::object_storage::S3Source;
467        let source = S3Source::new(bucket, path);
468        self.data_source = Some(Box::new(source));
469        self
470    }
471
472    /// Load data from AWS S3 with custom configuration
473    ///
474    /// Requires the "object-storage" feature to be enabled.
475    ///
476    /// # Example
477    /// ```rust,ignore
478    /// use elasticube_core::{S3Source, StorageFileFormat};
479    ///
480    /// let source = S3Source::new("my-bucket", "data/sales.csv")
481    ///     .with_region("us-west-2")
482    ///     .with_access_key("AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
483    ///     .with_format(StorageFileFormat::Csv)
484    ///     .with_batch_size(4096);
485    ///
486    /// let cube = ElastiCubeBuilder::new("sales")
487    ///     .load_s3_with(source)
488    ///     .build()?;
489    /// ```
490    #[cfg(feature = "object-storage")]
491    pub fn load_s3_with(mut self, source: crate::sources::object_storage::S3Source) -> Self {
492        self.data_source = Some(Box::new(source));
493        self
494    }
495
496    /// Load data from Google Cloud Storage (GCS)
497    ///
498    /// Requires the "object-storage" feature to be enabled.
499    ///
500    /// # Arguments
501    /// * `bucket` - GCS bucket name
502    /// * `path` - Path to the file in the bucket
503    ///
504    /// # Example
505    /// ```rust,ignore
506    /// // Uses Google Cloud credentials from GOOGLE_APPLICATION_CREDENTIALS env var
507    /// let cube = ElastiCubeBuilder::new("analytics")
508    ///     .load_gcs("my-gcs-bucket", "data/analytics.parquet")
509    ///     .build()?;
510    /// ```
511    #[cfg(feature = "object-storage")]
512    pub fn load_gcs(
513        mut self,
514        bucket: impl Into<String>,
515        path: impl Into<String>,
516    ) -> Self {
517        use crate::sources::object_storage::GcsSource;
518        let source = GcsSource::new(bucket, path);
519        self.data_source = Some(Box::new(source));
520        self
521    }
522
523    /// Load data from Google Cloud Storage with custom configuration
524    ///
525    /// Requires the "object-storage" feature to be enabled.
526    ///
527    /// # Example
528    /// ```rust,ignore
529    /// use elasticube_core::{GcsSource, StorageFileFormat};
530    ///
531    /// let source = GcsSource::new("my-bucket", "data/metrics.json")
532    ///     .with_service_account_key("/path/to/key.json")
533    ///     .with_format(StorageFileFormat::Json)
534    ///     .with_batch_size(8192);
535    ///
536    /// let cube = ElastiCubeBuilder::new("metrics")
537    ///     .load_gcs_with(source)
538    ///     .build()?;
539    /// ```
540    #[cfg(feature = "object-storage")]
541    pub fn load_gcs_with(mut self, source: crate::sources::object_storage::GcsSource) -> Self {
542        self.data_source = Some(Box::new(source));
543        self
544    }
545
546    /// Load data from Azure Blob Storage
547    ///
548    /// Requires the "object-storage" feature to be enabled.
549    ///
550    /// # Arguments
551    /// * `account` - Azure storage account name
552    /// * `container` - Container name
553    /// * `path` - Path to the file in the container
554    ///
555    /// # Example
556    /// ```rust,ignore
557    /// let cube = ElastiCubeBuilder::new("reports")
558    ///     .load_azure("mystorageaccount", "mycontainer", "data/reports.parquet")
559    ///     .build()?;
560    /// ```
561    #[cfg(feature = "object-storage")]
562    pub fn load_azure(
563        mut self,
564        account: impl Into<String>,
565        container: impl Into<String>,
566        path: impl Into<String>,
567    ) -> Self {
568        use crate::sources::object_storage::AzureSource;
569        let source = AzureSource::new(account, container, path);
570        self.data_source = Some(Box::new(source));
571        self
572    }
573
574    /// Load data from Azure Blob Storage with custom configuration
575    ///
576    /// Requires the "object-storage" feature to be enabled.
577    ///
578    /// # Example
579    /// ```rust,ignore
580    /// use elasticube_core::{AzureSource, StorageFileFormat};
581    ///
582    /// let source = AzureSource::new("mystorageaccount", "mycontainer", "data/logs.csv")
583    ///     .with_access_key("your-access-key")
584    ///     .with_format(StorageFileFormat::Csv)
585    ///     .with_batch_size(4096);
586    ///
587    /// let cube = ElastiCubeBuilder::new("logs")
588    ///     .load_azure_with(source)
589    ///     .build()?;
590    /// ```
591    #[cfg(feature = "object-storage")]
592    pub fn load_azure_with(mut self, source: crate::sources::object_storage::AzureSource) -> Self {
593        self.data_source = Some(Box::new(source));
594        self
595    }
596
597    /// Build the cube
598    ///
599    /// Loads data from the configured source and creates an ElastiCube.
600    /// If dimensions and measures were explicitly defined, validates that the
601    /// data schema matches. Otherwise, infers the schema from the data.
602    pub fn build(mut self) -> Result<ElastiCube> {
603        // Ensure we have a data source
604        let data_source = self.data_source.take().ok_or_else(|| {
605            Error::builder("No data source specified. Use load_csv, load_parquet, load_json, or load_record_batches")
606        })?;
607
608        // Load data from the source
609        let (loaded_schema, batches) = data_source.load()?;
610
611        // Determine the final Arrow schema
612        let arrow_schema = if self.schema.dimension_count() > 0 || self.schema.measure_count() > 0 {
613            // User has explicitly defined dimensions/measures
614            // Convert our CubeSchema to ArrowSchema and validate against loaded data
615            let expected_schema = Arc::new(self.schema.to_arrow_schema());
616
617            // Validate that the loaded schema is compatible
618            validate_schema_compatibility(&expected_schema, &loaded_schema)?;
619
620            // Use the loaded schema to avoid mismatch errors with RecordBatch schemas
621            // The validation ensures compatibility between expected and loaded schemas
622            loaded_schema
623        } else {
624            // No explicit schema defined - infer from loaded data
625            // We'll treat all columns as dimensions for now
626            // Users can explicitly specify measures if they want aggregations
627            for field in loaded_schema.fields() {
628                let dimension = Dimension::new(field.name(), field.data_type().clone());
629                self.schema.add_dimension(dimension)?;
630            }
631
632            loaded_schema
633        };
634
635        // Create the ElastiCube
636        ElastiCube::new(self.schema, arrow_schema, batches)
637    }
638}
639
640/// Validate that a loaded schema is compatible with the expected schema
641///
642/// Checks that all expected fields exist in the loaded schema with compatible types
643fn validate_schema_compatibility(
644    expected: &ArrowSchema,
645    loaded: &ArrowSchema,
646) -> Result<()> {
647    for expected_field in expected.fields() {
648        let loaded_field = loaded.field_with_name(expected_field.name()).map_err(|_| {
649            Error::schema(format!(
650                "Field '{}' not found in loaded data",
651                expected_field.name()
652            ))
653        })?;
654
655        // Check if data types match
656        if expected_field.data_type() != loaded_field.data_type() {
657            return Err(Error::schema(format!(
658                "Field '{}' has incompatible type: expected {:?}, found {:?}",
659                expected_field.name(),
660                expected_field.data_type(),
661                loaded_field.data_type()
662            )));
663        }
664    }
665
666    Ok(())
667}
668
669#[cfg(test)]
670mod tests {
671    use super::*;
672    use arrow::array::{Float64Array, Int32Array, StringArray};
673    use arrow::datatypes::Field;
674    use std::sync::Arc;
675
676    #[test]
677    fn test_builder_creation() {
678        let builder = ElastiCubeBuilder::new("test_cube");
679        assert_eq!(builder.schema.name(), "test_cube");
680    }
681
682    #[test]
683    fn test_builder_add_dimension() {
684        let builder = ElastiCubeBuilder::new("test")
685            .add_dimension("region", DataType::Utf8)
686            .unwrap();
687        assert!(builder.schema.has_dimension("region"));
688    }
689
690    #[test]
691    fn test_builder_add_measure() {
692        let builder = ElastiCubeBuilder::new("test")
693            .add_measure("sales", DataType::Float64, AggFunc::Sum)
694            .unwrap();
695        assert!(builder.schema.has_measure("sales"));
696    }
697
698    #[test]
699    fn test_build_without_data_source() {
700        let builder = ElastiCubeBuilder::new("test")
701            .add_dimension("region", DataType::Utf8)
702            .unwrap();
703
704        let result = builder.build();
705        assert!(result.is_err());
706        assert!(result
707            .unwrap_err()
708            .to_string()
709            .contains("No data source specified"));
710    }
711
712    #[test]
713    fn test_build_with_record_batches() {
714        // Create a simple schema
715        let schema = Arc::new(ArrowSchema::new(vec![
716            Field::new("id", DataType::Int32, false),
717            Field::new("value", DataType::Float64, false),
718        ]));
719
720        // Create some data
721        let batch = RecordBatch::try_new(
722            schema.clone(),
723            vec![
724                Arc::new(Int32Array::from(vec![1, 2, 3])),
725                Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])),
726            ],
727        )
728        .unwrap();
729
730        // Build the cube
731        let cube = ElastiCubeBuilder::new("test")
732            .load_record_batches(schema, vec![batch])
733            .unwrap()
734            .build()
735            .unwrap();
736
737        assert_eq!(cube.row_count(), 3);
738        assert_eq!(cube.dimensions().len(), 2); // Both fields treated as dimensions
739    }
740
741    #[test]
742    fn test_build_with_explicit_schema() {
743        // Create a schema
744        let schema = Arc::new(ArrowSchema::new(vec![
745            Field::new("region", DataType::Utf8, false),
746            Field::new("sales", DataType::Float64, false),
747        ]));
748
749        // Create some data
750        let batch = RecordBatch::try_new(
751            schema.clone(),
752            vec![
753                Arc::new(StringArray::from(vec!["North", "South", "East"])),
754                Arc::new(Float64Array::from(vec![100.0, 200.0, 150.0])),
755            ],
756        )
757        .unwrap();
758
759        // Build the cube with explicit dimensions and measures
760        let cube = ElastiCubeBuilder::new("sales_cube")
761            .add_dimension("region", DataType::Utf8)
762            .unwrap()
763            .add_measure("sales", DataType::Float64, AggFunc::Sum)
764            .unwrap()
765            .load_record_batches(schema, vec![batch])
766            .unwrap()
767            .build()
768            .unwrap();
769
770        assert_eq!(cube.row_count(), 3);
771        assert_eq!(cube.dimensions().len(), 1);
772        assert_eq!(cube.measures().len(), 1);
773    }
774
775    #[test]
776    fn test_schema_validation_failure() {
777        // Create a schema with wrong field names
778        let schema = Arc::new(ArrowSchema::new(vec![
779            Field::new("wrong_name", DataType::Utf8, false),
780            Field::new("sales", DataType::Float64, false),
781        ]));
782
783        let batch = RecordBatch::try_new(
784            schema.clone(),
785            vec![
786                Arc::new(StringArray::from(vec!["North"])),
787                Arc::new(Float64Array::from(vec![100.0])),
788            ],
789        )
790        .unwrap();
791
792        // This should fail because "region" is not in the loaded schema
793        let result = ElastiCubeBuilder::new("test")
794            .add_dimension("region", DataType::Utf8)
795            .unwrap()
796            .add_measure("sales", DataType::Float64, AggFunc::Sum)
797            .unwrap()
798            .load_record_batches(schema, vec![batch])
799            .unwrap()
800            .build();
801
802        assert!(result.is_err());
803        assert!(result.unwrap_err().to_string().contains("not found"));
804    }
805}