elasticube_core/builder.rs
1//! ElastiCube builder for constructing cubes
2
3use crate::cube::{
4 AggFunc, CalculatedMeasure, CubeSchema, Dimension, ElastiCube, Hierarchy, Measure,
5 VirtualDimension,
6};
7use crate::error::{Error, Result};
8use crate::sources::{CsvSource, DataSource, JsonSource, ParquetSource, RecordBatchSource};
9use arrow::datatypes::{DataType, Schema as ArrowSchema};
10use arrow::record_batch::RecordBatch;
11use std::sync::Arc;
12
13/// Builder for constructing an ElastiCube
14///
15/// Provides a fluent API for defining dimensions, measures, hierarchies,
16/// and loading data from various sources.
17#[derive(Debug)]
18pub struct ElastiCubeBuilder {
19 schema: CubeSchema,
20 data_source: Option<Box<dyn DataSource>>,
21}
22
23impl ElastiCubeBuilder {
24 /// Create a new builder
25 pub fn new(name: impl Into<String>) -> Self {
26 Self {
27 schema: CubeSchema::new(name),
28 data_source: None,
29 }
30 }
31
32 /// Add a dimension
33 pub fn add_dimension(
34 mut self,
35 name: impl Into<String>,
36 data_type: DataType,
37 ) -> Result<Self> {
38 let dimension = Dimension::new(name, data_type);
39 self.schema.add_dimension(dimension)?;
40 Ok(self)
41 }
42
43 /// Add a measure
44 pub fn add_measure(
45 mut self,
46 name: impl Into<String>,
47 data_type: DataType,
48 agg_func: AggFunc,
49 ) -> Result<Self> {
50 let measure = Measure::new(name, data_type, agg_func);
51 self.schema.add_measure(measure)?;
52 Ok(self)
53 }
54
55 /// Add a hierarchy
56 pub fn add_hierarchy(
57 mut self,
58 name: impl Into<String>,
59 levels: Vec<String>,
60 ) -> Result<Self> {
61 let hierarchy = Hierarchy::new(name, levels);
62 self.schema.add_hierarchy(hierarchy)?;
63 Ok(self)
64 }
65
66 /// Add a calculated measure (derived from an expression)
67 ///
68 /// # Arguments
69 /// * `name` - Name for the calculated measure
70 /// * `expression` - SQL expression (e.g., "revenue - cost")
71 /// * `data_type` - Expected result data type
72 /// * `agg_func` - Default aggregation function
73 ///
74 /// # Example
75 /// ```rust,ignore
76 /// let cube = ElastiCubeBuilder::new("sales")
77 /// .add_measure("revenue", DataType::Float64, AggFunc::Sum)?
78 /// .add_measure("cost", DataType::Float64, AggFunc::Sum)?
79 /// .add_calculated_measure(
80 /// "profit",
81 /// "revenue - cost",
82 /// DataType::Float64,
83 /// AggFunc::Sum
84 /// )?
85 /// .build()?;
86 /// ```
87 pub fn add_calculated_measure(
88 mut self,
89 name: impl Into<String>,
90 expression: impl Into<String>,
91 data_type: DataType,
92 agg_func: AggFunc,
93 ) -> Result<Self> {
94 let calc_measure = CalculatedMeasure::new(name, expression, data_type, agg_func)?;
95 self.schema.add_calculated_measure(calc_measure)?;
96 Ok(self)
97 }
98
99 /// Add a virtual dimension (computed dimension)
100 ///
101 /// # Arguments
102 /// * `name` - Name for the virtual dimension
103 /// * `expression` - SQL expression (e.g., "EXTRACT(YEAR FROM date)")
104 /// * `data_type` - Expected result data type
105 ///
106 /// # Example
107 /// ```rust,ignore
108 /// let cube = ElastiCubeBuilder::new("sales")
109 /// .add_dimension("sale_date", DataType::Date32)?
110 /// .add_virtual_dimension(
111 /// "year",
112 /// "EXTRACT(YEAR FROM sale_date)",
113 /// DataType::Int32
114 /// )?
115 /// .build()?;
116 /// ```
117 pub fn add_virtual_dimension(
118 mut self,
119 name: impl Into<String>,
120 expression: impl Into<String>,
121 data_type: DataType,
122 ) -> Result<Self> {
123 let virtual_dim = VirtualDimension::new(name, expression, data_type)?;
124 self.schema.add_virtual_dimension(virtual_dim)?;
125 Ok(self)
126 }
127
128 /// Set the cube description
129 pub fn with_description(mut self, description: impl Into<String>) -> Self {
130 self.schema.set_description(description);
131 self
132 }
133
134 /// Load data from a CSV file
135 ///
136 /// # Arguments
137 /// * `path` - Path to the CSV file
138 ///
139 /// # Example
140 /// ```rust,ignore
141 /// let cube = ElastiCubeBuilder::new("sales")
142 /// .load_csv("data.csv")?
143 /// .build()?;
144 /// ```
145 pub fn load_csv(mut self, path: impl Into<String>) -> Self {
146 let source = CsvSource::new(path);
147 self.data_source = Some(Box::new(source));
148 self
149 }
150
151 /// Load data from a CSV file with custom configuration
152 ///
153 /// # Arguments
154 /// * `source` - Configured CsvSource
155 ///
156 /// # Example
157 /// ```rust,ignore
158 /// let source = CsvSource::new("data.csv")
159 /// .with_delimiter(b';')
160 /// .with_batch_size(4096);
161 /// let cube = ElastiCubeBuilder::new("sales")
162 /// .load_csv_with(source)
163 /// .build()?;
164 /// ```
165 pub fn load_csv_with(mut self, source: CsvSource) -> Self {
166 self.data_source = Some(Box::new(source));
167 self
168 }
169
170 /// Load data from a Parquet file
171 ///
172 /// # Arguments
173 /// * `path` - Path to the Parquet file
174 pub fn load_parquet(mut self, path: impl Into<String>) -> Self {
175 let source = ParquetSource::new(path);
176 self.data_source = Some(Box::new(source));
177 self
178 }
179
180 /// Load data from a Parquet file with custom configuration
181 pub fn load_parquet_with(mut self, source: ParquetSource) -> Self {
182 self.data_source = Some(Box::new(source));
183 self
184 }
185
186 /// Load data from a JSON file
187 ///
188 /// # Arguments
189 /// * `path` - Path to the JSON file
190 pub fn load_json(mut self, path: impl Into<String>) -> Self {
191 let source = JsonSource::new(path);
192 self.data_source = Some(Box::new(source));
193 self
194 }
195
196 /// Load data from a JSON file with custom configuration
197 pub fn load_json_with(mut self, source: JsonSource) -> Self {
198 self.data_source = Some(Box::new(source));
199 self
200 }
201
202 /// Load data from Arrow RecordBatches
203 ///
204 /// # Arguments
205 /// * `schema` - Arrow schema for the batches
206 /// * `batches` - Vector of RecordBatches containing the data
207 pub fn load_record_batches(
208 mut self,
209 schema: Arc<ArrowSchema>,
210 batches: Vec<RecordBatch>,
211 ) -> Result<Self> {
212 let source = RecordBatchSource::new(schema, batches)?;
213 self.data_source = Some(Box::new(source));
214 Ok(self)
215 }
216
217 /// Load data from RecordBatches (convenience method for testing)
218 ///
219 /// Infers schema from the first batch. All batches must have the same schema.
220 ///
221 /// # Arguments
222 /// * `batches` - Vector of RecordBatches containing the data
223 ///
224 /// # Example
225 /// ```rust,ignore
226 /// let batch = RecordBatch::try_new(schema, columns)?;
227 /// let cube = ElastiCubeBuilder::new("test")
228 /// .with_data(vec![batch])?
229 /// .build()?;
230 /// ```
231 pub fn with_data(mut self, batches: Vec<RecordBatch>) -> Result<Self> {
232 if batches.is_empty() {
233 return Err(Error::builder("Cannot load empty batch vector"));
234 }
235
236 let schema = batches[0].schema();
237 let source = RecordBatchSource::new(schema, batches)?;
238 self.data_source = Some(Box::new(source));
239 Ok(self)
240 }
241
242 // ==============================================================================
243 // Database Sources (available with "database" feature)
244 // ==============================================================================
245
246 /// Load data from PostgreSQL database
247 ///
248 /// Requires the "database" feature to be enabled.
249 ///
250 /// # Arguments
251 /// * `host` - Database host (e.g., "localhost")
252 /// * `database` - Database name
253 /// * `username` - Username for authentication
254 /// * `password` - Password for authentication
255 /// * `query` - SQL query to execute
256 ///
257 /// # Example
258 /// ```rust,ignore
259 /// let cube = ElastiCubeBuilder::new("sales")
260 /// .load_postgres("localhost", "mydb", "user", "pass", "SELECT * FROM sales")?
261 /// .build()?;
262 /// ```
263 #[cfg(feature = "database")]
264 pub fn load_postgres(
265 mut self,
266 host: impl Into<String>,
267 database: impl Into<String>,
268 username: impl Into<String>,
269 password: impl Into<String>,
270 query: impl Into<String>,
271 ) -> Self {
272 use crate::sources::database::PostgresSource;
273 let source = PostgresSource::new(host, database, username, password)
274 .with_query(query);
275 self.data_source = Some(Box::new(source));
276 self
277 }
278
279 /// Load data from PostgreSQL with custom configuration
280 ///
281 /// Requires the "database" feature to be enabled.
282 ///
283 /// # Example
284 /// ```rust,ignore
285 /// let source = PostgresSource::new("localhost", "mydb", "user", "pass")
286 /// .with_port(5433)
287 /// .with_query("SELECT * FROM sales WHERE year = 2024")
288 /// .with_batch_size(4096);
289 ///
290 /// let cube = ElastiCubeBuilder::new("sales")
291 /// .load_postgres_with(source)
292 /// .build()?;
293 /// ```
294 #[cfg(feature = "database")]
295 pub fn load_postgres_with(mut self, source: crate::sources::database::PostgresSource) -> Self {
296 self.data_source = Some(Box::new(source));
297 self
298 }
299
300 /// Load data from MySQL database
301 ///
302 /// Requires the "database" feature to be enabled.
303 ///
304 /// # Arguments
305 /// * `host` - Database host (e.g., "localhost")
306 /// * `database` - Database name
307 /// * `username` - Username for authentication
308 /// * `password` - Password for authentication
309 /// * `query` - SQL query to execute
310 ///
311 /// # Example
312 /// ```rust,ignore
313 /// let cube = ElastiCubeBuilder::new("orders")
314 /// .load_mysql("localhost", "mydb", "user", "pass", "SELECT * FROM orders")?
315 /// .build()?;
316 /// ```
317 #[cfg(feature = "database")]
318 pub fn load_mysql(
319 mut self,
320 host: impl Into<String>,
321 database: impl Into<String>,
322 username: impl Into<String>,
323 password: impl Into<String>,
324 query: impl Into<String>,
325 ) -> Self {
326 use crate::sources::database::MySqlSource;
327 let source = MySqlSource::new(host, database, username, password)
328 .with_query(query);
329 self.data_source = Some(Box::new(source));
330 self
331 }
332
333 /// Load data from MySQL with custom configuration
334 ///
335 /// Requires the "database" feature to be enabled.
336 #[cfg(feature = "database")]
337 pub fn load_mysql_with(mut self, source: crate::sources::database::MySqlSource) -> Self {
338 self.data_source = Some(Box::new(source));
339 self
340 }
341
342 /// Load data via generic ODBC connection
343 ///
344 /// Supports any ODBC-compatible database (PostgreSQL, MySQL, SQL Server, SQLite, etc.).
345 /// Requires the "database" feature to be enabled.
346 ///
347 /// # Arguments
348 /// * `connection_string` - ODBC connection string
349 /// * `query` - SQL query to execute
350 ///
351 /// # Example Connection Strings
352 ///
353 /// **PostgreSQL**:
354 /// ```text
355 /// Driver={PostgreSQL Unicode};Server=localhost;Port=5432;Database=mydb;Uid=user;Pwd=pass;
356 /// ```
357 ///
358 /// **SQL Server**:
359 /// ```text
360 /// Driver={ODBC Driver 17 for SQL Server};Server=localhost;Database=mydb;Uid=user;Pwd=pass;
361 /// ```
362 ///
363 /// # Example
364 /// ```rust,ignore
365 /// let cube = ElastiCubeBuilder::new("analytics")
366 /// .load_odbc(
367 /// "Driver={PostgreSQL Unicode};Server=localhost;Database=analytics;Uid=admin;Pwd=secret;",
368 /// "SELECT * FROM metrics WHERE date >= '2024-01-01'"
369 /// )
370 /// .build()?;
371 /// ```
372 #[cfg(feature = "database")]
373 pub fn load_odbc(
374 mut self,
375 connection_string: impl Into<String>,
376 query: impl Into<String>,
377 ) -> Self {
378 use crate::sources::database::OdbcSource;
379 let source = OdbcSource::new(connection_string, query);
380 self.data_source = Some(Box::new(source));
381 self
382 }
383
384 /// Load data via ODBC with custom configuration
385 ///
386 /// Requires the "database" feature to be enabled.
387 #[cfg(feature = "database")]
388 pub fn load_odbc_with(mut self, source: crate::sources::database::OdbcSource) -> Self {
389 self.data_source = Some(Box::new(source));
390 self
391 }
392
393 // ==============================================================================
394 // REST API Sources (available with "rest-api" feature)
395 // ==============================================================================
396
397 /// Load data from a REST API endpoint
398 ///
399 /// Requires the "rest-api" feature to be enabled.
400 /// The API must return JSON data (either an array of objects or a single object).
401 ///
402 /// # Arguments
403 /// * `url` - API endpoint URL
404 ///
405 /// # Example
406 /// ```rust,ignore
407 /// let cube = ElastiCubeBuilder::new("api_data")
408 /// .load_rest_api("https://api.example.com/sales")
409 /// .build()?;
410 /// ```
411 #[cfg(feature = "rest-api")]
412 pub fn load_rest_api(mut self, url: impl Into<String>) -> Self {
413 use crate::sources::rest::RestApiSource;
414 let source = RestApiSource::new(url);
415 self.data_source = Some(Box::new(source));
416 self
417 }
418
419 /// Load data from a REST API with custom configuration
420 ///
421 /// Requires the "rest-api" feature to be enabled.
422 ///
423 /// # Example
424 /// ```rust,ignore
425 /// let source = RestApiSource::new("https://api.example.com/data")
426 /// .with_method(HttpMethod::Post)
427 /// .with_header("Authorization", "Bearer token123")
428 /// .with_query_param("limit", "1000")
429 /// .with_timeout_secs(60);
430 ///
431 /// let cube = ElastiCubeBuilder::new("api_data")
432 /// .load_rest_api_with(source)
433 /// .build()?;
434 /// ```
435 #[cfg(feature = "rest-api")]
436 pub fn load_rest_api_with(mut self, source: crate::sources::rest::RestApiSource) -> Self {
437 self.data_source = Some(Box::new(source));
438 self
439 }
440
441 // ==============================================================================
442 // Object Storage Sources (available with "object-storage" feature)
443 // ==============================================================================
444
445 /// Load data from AWS S3
446 ///
447 /// Requires the "object-storage" feature to be enabled.
448 ///
449 /// # Arguments
450 /// * `bucket` - S3 bucket name
451 /// * `path` - Path to the file in the bucket (e.g., "data/sales.parquet")
452 ///
453 /// # Example
454 /// ```rust,ignore
455 /// // Uses AWS credentials from environment or ~/.aws/credentials
456 /// let cube = ElastiCubeBuilder::new("sales")
457 /// .load_s3("my-bucket", "data/sales.parquet")
458 /// .build()?;
459 /// ```
460 #[cfg(feature = "object-storage")]
461 pub fn load_s3(
462 mut self,
463 bucket: impl Into<String>,
464 path: impl Into<String>,
465 ) -> Self {
466 use crate::sources::object_storage::S3Source;
467 let source = S3Source::new(bucket, path);
468 self.data_source = Some(Box::new(source));
469 self
470 }
471
472 /// Load data from AWS S3 with custom configuration
473 ///
474 /// Requires the "object-storage" feature to be enabled.
475 ///
476 /// # Example
477 /// ```rust,ignore
478 /// use elasticube_core::{S3Source, StorageFileFormat};
479 ///
480 /// let source = S3Source::new("my-bucket", "data/sales.csv")
481 /// .with_region("us-west-2")
482 /// .with_access_key("AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
483 /// .with_format(StorageFileFormat::Csv)
484 /// .with_batch_size(4096);
485 ///
486 /// let cube = ElastiCubeBuilder::new("sales")
487 /// .load_s3_with(source)
488 /// .build()?;
489 /// ```
490 #[cfg(feature = "object-storage")]
491 pub fn load_s3_with(mut self, source: crate::sources::object_storage::S3Source) -> Self {
492 self.data_source = Some(Box::new(source));
493 self
494 }
495
496 /// Load data from Google Cloud Storage (GCS)
497 ///
498 /// Requires the "object-storage" feature to be enabled.
499 ///
500 /// # Arguments
501 /// * `bucket` - GCS bucket name
502 /// * `path` - Path to the file in the bucket
503 ///
504 /// # Example
505 /// ```rust,ignore
506 /// // Uses Google Cloud credentials from GOOGLE_APPLICATION_CREDENTIALS env var
507 /// let cube = ElastiCubeBuilder::new("analytics")
508 /// .load_gcs("my-gcs-bucket", "data/analytics.parquet")
509 /// .build()?;
510 /// ```
511 #[cfg(feature = "object-storage")]
512 pub fn load_gcs(
513 mut self,
514 bucket: impl Into<String>,
515 path: impl Into<String>,
516 ) -> Self {
517 use crate::sources::object_storage::GcsSource;
518 let source = GcsSource::new(bucket, path);
519 self.data_source = Some(Box::new(source));
520 self
521 }
522
523 /// Load data from Google Cloud Storage with custom configuration
524 ///
525 /// Requires the "object-storage" feature to be enabled.
526 ///
527 /// # Example
528 /// ```rust,ignore
529 /// use elasticube_core::{GcsSource, StorageFileFormat};
530 ///
531 /// let source = GcsSource::new("my-bucket", "data/metrics.json")
532 /// .with_service_account_key("/path/to/key.json")
533 /// .with_format(StorageFileFormat::Json)
534 /// .with_batch_size(8192);
535 ///
536 /// let cube = ElastiCubeBuilder::new("metrics")
537 /// .load_gcs_with(source)
538 /// .build()?;
539 /// ```
540 #[cfg(feature = "object-storage")]
541 pub fn load_gcs_with(mut self, source: crate::sources::object_storage::GcsSource) -> Self {
542 self.data_source = Some(Box::new(source));
543 self
544 }
545
546 /// Load data from Azure Blob Storage
547 ///
548 /// Requires the "object-storage" feature to be enabled.
549 ///
550 /// # Arguments
551 /// * `account` - Azure storage account name
552 /// * `container` - Container name
553 /// * `path` - Path to the file in the container
554 ///
555 /// # Example
556 /// ```rust,ignore
557 /// let cube = ElastiCubeBuilder::new("reports")
558 /// .load_azure("mystorageaccount", "mycontainer", "data/reports.parquet")
559 /// .build()?;
560 /// ```
561 #[cfg(feature = "object-storage")]
562 pub fn load_azure(
563 mut self,
564 account: impl Into<String>,
565 container: impl Into<String>,
566 path: impl Into<String>,
567 ) -> Self {
568 use crate::sources::object_storage::AzureSource;
569 let source = AzureSource::new(account, container, path);
570 self.data_source = Some(Box::new(source));
571 self
572 }
573
574 /// Load data from Azure Blob Storage with custom configuration
575 ///
576 /// Requires the "object-storage" feature to be enabled.
577 ///
578 /// # Example
579 /// ```rust,ignore
580 /// use elasticube_core::{AzureSource, StorageFileFormat};
581 ///
582 /// let source = AzureSource::new("mystorageaccount", "mycontainer", "data/logs.csv")
583 /// .with_access_key("your-access-key")
584 /// .with_format(StorageFileFormat::Csv)
585 /// .with_batch_size(4096);
586 ///
587 /// let cube = ElastiCubeBuilder::new("logs")
588 /// .load_azure_with(source)
589 /// .build()?;
590 /// ```
591 #[cfg(feature = "object-storage")]
592 pub fn load_azure_with(mut self, source: crate::sources::object_storage::AzureSource) -> Self {
593 self.data_source = Some(Box::new(source));
594 self
595 }
596
597 /// Build the cube
598 ///
599 /// Loads data from the configured source and creates an ElastiCube.
600 /// If dimensions and measures were explicitly defined, validates that the
601 /// data schema matches. Otherwise, infers the schema from the data.
602 pub fn build(mut self) -> Result<ElastiCube> {
603 // Ensure we have a data source
604 let data_source = self.data_source.take().ok_or_else(|| {
605 Error::builder("No data source specified. Use load_csv, load_parquet, load_json, or load_record_batches")
606 })?;
607
608 // Load data from the source
609 let (loaded_schema, batches) = data_source.load()?;
610
611 // Determine the final Arrow schema
612 let arrow_schema = if self.schema.dimension_count() > 0 || self.schema.measure_count() > 0 {
613 // User has explicitly defined dimensions/measures
614 // Convert our CubeSchema to ArrowSchema and validate against loaded data
615 let expected_schema = Arc::new(self.schema.to_arrow_schema());
616
617 // Validate that the loaded schema is compatible
618 validate_schema_compatibility(&expected_schema, &loaded_schema)?;
619
620 // Use the loaded schema to avoid mismatch errors with RecordBatch schemas
621 // The validation ensures compatibility between expected and loaded schemas
622 loaded_schema
623 } else {
624 // No explicit schema defined - infer from loaded data
625 // We'll treat all columns as dimensions for now
626 // Users can explicitly specify measures if they want aggregations
627 for field in loaded_schema.fields() {
628 let dimension = Dimension::new(field.name(), field.data_type().clone());
629 self.schema.add_dimension(dimension)?;
630 }
631
632 loaded_schema
633 };
634
635 // Create the ElastiCube
636 ElastiCube::new(self.schema, arrow_schema, batches)
637 }
638}
639
640/// Validate that a loaded schema is compatible with the expected schema
641///
642/// Checks that all expected fields exist in the loaded schema with compatible types
643fn validate_schema_compatibility(
644 expected: &ArrowSchema,
645 loaded: &ArrowSchema,
646) -> Result<()> {
647 for expected_field in expected.fields() {
648 let loaded_field = loaded.field_with_name(expected_field.name()).map_err(|_| {
649 Error::schema(format!(
650 "Field '{}' not found in loaded data",
651 expected_field.name()
652 ))
653 })?;
654
655 // Check if data types match
656 if expected_field.data_type() != loaded_field.data_type() {
657 return Err(Error::schema(format!(
658 "Field '{}' has incompatible type: expected {:?}, found {:?}",
659 expected_field.name(),
660 expected_field.data_type(),
661 loaded_field.data_type()
662 )));
663 }
664 }
665
666 Ok(())
667}
668
669#[cfg(test)]
670mod tests {
671 use super::*;
672 use arrow::array::{Float64Array, Int32Array, StringArray};
673 use arrow::datatypes::Field;
674 use std::sync::Arc;
675
676 #[test]
677 fn test_builder_creation() {
678 let builder = ElastiCubeBuilder::new("test_cube");
679 assert_eq!(builder.schema.name(), "test_cube");
680 }
681
682 #[test]
683 fn test_builder_add_dimension() {
684 let builder = ElastiCubeBuilder::new("test")
685 .add_dimension("region", DataType::Utf8)
686 .unwrap();
687 assert!(builder.schema.has_dimension("region"));
688 }
689
690 #[test]
691 fn test_builder_add_measure() {
692 let builder = ElastiCubeBuilder::new("test")
693 .add_measure("sales", DataType::Float64, AggFunc::Sum)
694 .unwrap();
695 assert!(builder.schema.has_measure("sales"));
696 }
697
698 #[test]
699 fn test_build_without_data_source() {
700 let builder = ElastiCubeBuilder::new("test")
701 .add_dimension("region", DataType::Utf8)
702 .unwrap();
703
704 let result = builder.build();
705 assert!(result.is_err());
706 assert!(result
707 .unwrap_err()
708 .to_string()
709 .contains("No data source specified"));
710 }
711
712 #[test]
713 fn test_build_with_record_batches() {
714 // Create a simple schema
715 let schema = Arc::new(ArrowSchema::new(vec![
716 Field::new("id", DataType::Int32, false),
717 Field::new("value", DataType::Float64, false),
718 ]));
719
720 // Create some data
721 let batch = RecordBatch::try_new(
722 schema.clone(),
723 vec![
724 Arc::new(Int32Array::from(vec![1, 2, 3])),
725 Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0])),
726 ],
727 )
728 .unwrap();
729
730 // Build the cube
731 let cube = ElastiCubeBuilder::new("test")
732 .load_record_batches(schema, vec![batch])
733 .unwrap()
734 .build()
735 .unwrap();
736
737 assert_eq!(cube.row_count(), 3);
738 assert_eq!(cube.dimensions().len(), 2); // Both fields treated as dimensions
739 }
740
741 #[test]
742 fn test_build_with_explicit_schema() {
743 // Create a schema
744 let schema = Arc::new(ArrowSchema::new(vec![
745 Field::new("region", DataType::Utf8, false),
746 Field::new("sales", DataType::Float64, false),
747 ]));
748
749 // Create some data
750 let batch = RecordBatch::try_new(
751 schema.clone(),
752 vec![
753 Arc::new(StringArray::from(vec!["North", "South", "East"])),
754 Arc::new(Float64Array::from(vec![100.0, 200.0, 150.0])),
755 ],
756 )
757 .unwrap();
758
759 // Build the cube with explicit dimensions and measures
760 let cube = ElastiCubeBuilder::new("sales_cube")
761 .add_dimension("region", DataType::Utf8)
762 .unwrap()
763 .add_measure("sales", DataType::Float64, AggFunc::Sum)
764 .unwrap()
765 .load_record_batches(schema, vec![batch])
766 .unwrap()
767 .build()
768 .unwrap();
769
770 assert_eq!(cube.row_count(), 3);
771 assert_eq!(cube.dimensions().len(), 1);
772 assert_eq!(cube.measures().len(), 1);
773 }
774
775 #[test]
776 fn test_schema_validation_failure() {
777 // Create a schema with wrong field names
778 let schema = Arc::new(ArrowSchema::new(vec![
779 Field::new("wrong_name", DataType::Utf8, false),
780 Field::new("sales", DataType::Float64, false),
781 ]));
782
783 let batch = RecordBatch::try_new(
784 schema.clone(),
785 vec![
786 Arc::new(StringArray::from(vec!["North"])),
787 Arc::new(Float64Array::from(vec![100.0])),
788 ],
789 )
790 .unwrap();
791
792 // This should fail because "region" is not in the loaded schema
793 let result = ElastiCubeBuilder::new("test")
794 .add_dimension("region", DataType::Utf8)
795 .unwrap()
796 .add_measure("sales", DataType::Float64, AggFunc::Sum)
797 .unwrap()
798 .load_record_batches(schema, vec![batch])
799 .unwrap()
800 .build();
801
802 assert!(result.is_err());
803 assert!(result.unwrap_err().to_string().contains("not found"));
804 }
805}