Skip to main content

dbx_core/engine/
schema_builder.rs

1//! Schema Builder - Fluent API for building Arrow Schemas
2//!
3//! This module provides a type-safe, fluent API for building Arrow schemas
4//! without manually constructing Field objects.
5
6use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
7
8/// Schema Builder for constructing Arrow schemas using a fluent API
9///
10/// # Example
11///
12/// ```rust
13/// use dbx_core::SchemaBuilder;
14/// use arrow::datatypes::DataType;
15///
16/// let schema = SchemaBuilder::new()
17///     .id("id")
18///     .text("name")
19///     .int32("age").nullable()
20///     .build();
21/// ```
22#[derive(Debug, Clone)]
23pub struct SchemaBuilder {
24    fields: Vec<Field>,
25}
26
27impl SchemaBuilder {
28    /// Create a new SchemaBuilder
29    ///
30    /// # Example
31    ///
32    /// ```rust
33    /// use dbx_core::SchemaBuilder;
34    ///
35    /// let builder = SchemaBuilder::new();
36    /// ```
37    pub fn new() -> Self {
38        Self { fields: Vec::new() }
39    }
40
41    /// Add a column with explicit type and nullability
42    ///
43    /// # Example
44    ///
45    /// ```rust
46    /// use dbx_core::SchemaBuilder;
47    /// use arrow::datatypes::DataType;
48    ///
49    /// let schema = SchemaBuilder::new()
50    ///     .column("id", DataType::Int64, false)
51    ///     .column("name", DataType::Utf8, true)
52    ///     .build();
53    /// ```
54    pub fn column(mut self, name: &str, data_type: DataType, nullable: bool) -> Self {
55        self.fields.push(Field::new(name, data_type, nullable));
56        self
57    }
58
59    /// Build the final Schema
60    ///
61    /// # Example
62    ///
63    /// ```rust
64    /// use dbx_core::SchemaBuilder;
65    ///
66    /// let schema = SchemaBuilder::new()
67    ///     .id("id")
68    ///     .text("name")
69    ///     .build();
70    /// ```
71    pub fn build(self) -> Schema {
72        Schema::new(self.fields)
73    }
74
75    // ========== Type-specific convenience methods ==========
76
77    /// Add an ID column (Int64, NOT NULL)
78    ///
79    /// # Example
80    ///
81    /// ```rust
82    /// use dbx_core::SchemaBuilder;
83    ///
84    /// let schema = SchemaBuilder::new()
85    ///     .id("id")
86    ///     .build();
87    /// ```
88    pub fn id(self, name: &str) -> Self {
89        self.column(name, DataType::Int64, false)
90    }
91
92    /// Add a text column (Utf8, nullable by default)
93    ///
94    /// # Example
95    ///
96    /// ```rust
97    /// use dbx_core::SchemaBuilder;
98    ///
99    /// let schema = SchemaBuilder::new()
100    ///     .text("name")
101    ///     .build();
102    /// ```
103    pub fn text(self, name: &str) -> Self {
104        self.column(name, DataType::Utf8, true)
105    }
106
107    /// Add an Int32 column (nullable by default)
108    ///
109    /// # Example
110    ///
111    /// ```rust
112    /// use dbx_core::SchemaBuilder;
113    ///
114    /// let schema = SchemaBuilder::new()
115    ///     .int32("age")
116    ///     .build();
117    /// ```
118    pub fn int32(self, name: &str) -> Self {
119        self.column(name, DataType::Int32, true)
120    }
121
122    /// Add an Int64 column (nullable by default)
123    ///
124    /// # Example
125    ///
126    /// ```rust
127    /// use dbx_core::SchemaBuilder;
128    ///
129    /// let schema = SchemaBuilder::new()
130    ///     .int64("user_id")
131    ///     .build();
132    /// ```
133    pub fn int64(self, name: &str) -> Self {
134        self.column(name, DataType::Int64, true)
135    }
136
137    /// Add a Float64 column (nullable by default)
138    ///
139    /// # Example
140    ///
141    /// ```rust
142    /// use dbx_core::SchemaBuilder;
143    ///
144    /// let schema = SchemaBuilder::new()
145    ///     .float64("salary")
146    ///     .build();
147    /// ```
148    pub fn float64(self, name: &str) -> Self {
149        self.column(name, DataType::Float64, true)
150    }
151
152    /// Add a Boolean column (nullable by default)
153    ///
154    /// # Example
155    ///
156    /// ```rust
157    /// use dbx_core::SchemaBuilder;
158    ///
159    /// let schema = SchemaBuilder::new()
160    ///     .boolean("is_active")
161    ///     .build();
162    /// ```
163    pub fn boolean(self, name: &str) -> Self {
164        self.column(name, DataType::Boolean, true)
165    }
166
167    /// Add a Timestamp column (nullable by default)
168    ///
169    /// # Example
170    ///
171    /// ```rust
172    /// use dbx_core::SchemaBuilder;
173    ///
174    /// let schema = SchemaBuilder::new()
175    ///     .timestamp("created_at")
176    ///     .build();
177    /// ```
178    pub fn timestamp(self, name: &str) -> Self {
179        self.column(name, DataType::Timestamp(TimeUnit::Millisecond, None), true)
180    }
181
182    // ========== Nullability control methods ==========
183
184    /// Make the last added field nullable
185    ///
186    /// # Example
187    ///
188    /// ```rust
189    /// use dbx_core::SchemaBuilder;
190    ///
191    /// let schema = SchemaBuilder::new()
192    ///     .int32("age").nullable()
193    ///     .build();
194    /// ```
195    pub fn nullable(mut self) -> Self {
196        if let Some(field) = self.fields.last_mut() {
197            *field = Field::new(field.name(), field.data_type().clone(), true);
198        }
199        self
200    }
201
202    /// Make the last added field NOT NULL
203    ///
204    /// # Example
205    ///
206    /// ```rust
207    /// use dbx_core::SchemaBuilder;
208    ///
209    /// let schema = SchemaBuilder::new()
210    ///     .text("email").not_null()
211    ///     .build();
212    /// ```
213    pub fn not_null(mut self) -> Self {
214        if let Some(field) = self.fields.last_mut() {
215            *field = Field::new(field.name(), field.data_type().clone(), false);
216        }
217        self
218    }
219}
220
221impl Default for SchemaBuilder {
222    fn default() -> Self {
223        Self::new()
224    }
225}