dbx_core/engine/schema_builder.rs
1//! Schema Builder - Fluent API for building Arrow Schemas
2//!
3//! This module provides a type-safe, fluent API for building Arrow schemas
4//! without manually constructing Field objects.
5
6use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
7
8/// Schema Builder for constructing Arrow schemas using a fluent API
9///
10/// # Example
11///
12/// ```rust
13/// use dbx_core::SchemaBuilder;
14/// use arrow::datatypes::DataType;
15///
16/// let schema = SchemaBuilder::new()
17/// .id("id")
18/// .text("name")
19/// .int32("age").nullable()
20/// .build();
21/// ```
22#[derive(Debug, Clone)]
23pub struct SchemaBuilder {
24 fields: Vec<Field>,
25}
26
27impl SchemaBuilder {
28 /// Create a new SchemaBuilder
29 ///
30 /// # Example
31 ///
32 /// ```rust
33 /// use dbx_core::SchemaBuilder;
34 ///
35 /// let builder = SchemaBuilder::new();
36 /// ```
37 pub fn new() -> Self {
38 Self { fields: Vec::new() }
39 }
40
41 /// Add a column with explicit type and nullability
42 ///
43 /// # Example
44 ///
45 /// ```rust
46 /// use dbx_core::SchemaBuilder;
47 /// use arrow::datatypes::DataType;
48 ///
49 /// let schema = SchemaBuilder::new()
50 /// .column("id", DataType::Int64, false)
51 /// .column("name", DataType::Utf8, true)
52 /// .build();
53 /// ```
54 pub fn column(mut self, name: &str, data_type: DataType, nullable: bool) -> Self {
55 self.fields.push(Field::new(name, data_type, nullable));
56 self
57 }
58
59 /// Build the final Schema
60 ///
61 /// # Example
62 ///
63 /// ```rust
64 /// use dbx_core::SchemaBuilder;
65 ///
66 /// let schema = SchemaBuilder::new()
67 /// .id("id")
68 /// .text("name")
69 /// .build();
70 /// ```
71 pub fn build(self) -> Schema {
72 Schema::new(self.fields)
73 }
74
75 // ========== Type-specific convenience methods ==========
76
77 /// Add an ID column (Int64, NOT NULL)
78 ///
79 /// # Example
80 ///
81 /// ```rust
82 /// use dbx_core::SchemaBuilder;
83 ///
84 /// let schema = SchemaBuilder::new()
85 /// .id("id")
86 /// .build();
87 /// ```
88 pub fn id(self, name: &str) -> Self {
89 self.column(name, DataType::Int64, false)
90 }
91
92 /// Add a text column (Utf8, nullable by default)
93 ///
94 /// # Example
95 ///
96 /// ```rust
97 /// use dbx_core::SchemaBuilder;
98 ///
99 /// let schema = SchemaBuilder::new()
100 /// .text("name")
101 /// .build();
102 /// ```
103 pub fn text(self, name: &str) -> Self {
104 self.column(name, DataType::Utf8, true)
105 }
106
107 /// Add an Int32 column (nullable by default)
108 ///
109 /// # Example
110 ///
111 /// ```rust
112 /// use dbx_core::SchemaBuilder;
113 ///
114 /// let schema = SchemaBuilder::new()
115 /// .int32("age")
116 /// .build();
117 /// ```
118 pub fn int32(self, name: &str) -> Self {
119 self.column(name, DataType::Int32, true)
120 }
121
122 /// Add an Int64 column (nullable by default)
123 ///
124 /// # Example
125 ///
126 /// ```rust
127 /// use dbx_core::SchemaBuilder;
128 ///
129 /// let schema = SchemaBuilder::new()
130 /// .int64("user_id")
131 /// .build();
132 /// ```
133 pub fn int64(self, name: &str) -> Self {
134 self.column(name, DataType::Int64, true)
135 }
136
137 /// Add a Float64 column (nullable by default)
138 ///
139 /// # Example
140 ///
141 /// ```rust
142 /// use dbx_core::SchemaBuilder;
143 ///
144 /// let schema = SchemaBuilder::new()
145 /// .float64("salary")
146 /// .build();
147 /// ```
148 pub fn float64(self, name: &str) -> Self {
149 self.column(name, DataType::Float64, true)
150 }
151
152 /// Add a Boolean column (nullable by default)
153 ///
154 /// # Example
155 ///
156 /// ```rust
157 /// use dbx_core::SchemaBuilder;
158 ///
159 /// let schema = SchemaBuilder::new()
160 /// .boolean("is_active")
161 /// .build();
162 /// ```
163 pub fn boolean(self, name: &str) -> Self {
164 self.column(name, DataType::Boolean, true)
165 }
166
167 /// Add a Timestamp column (nullable by default)
168 ///
169 /// # Example
170 ///
171 /// ```rust
172 /// use dbx_core::SchemaBuilder;
173 ///
174 /// let schema = SchemaBuilder::new()
175 /// .timestamp("created_at")
176 /// .build();
177 /// ```
178 pub fn timestamp(self, name: &str) -> Self {
179 self.column(name, DataType::Timestamp(TimeUnit::Millisecond, None), true)
180 }
181
182 // ========== Nullability control methods ==========
183
184 /// Make the last added field nullable
185 ///
186 /// # Example
187 ///
188 /// ```rust
189 /// use dbx_core::SchemaBuilder;
190 ///
191 /// let schema = SchemaBuilder::new()
192 /// .int32("age").nullable()
193 /// .build();
194 /// ```
195 pub fn nullable(mut self) -> Self {
196 if let Some(field) = self.fields.last_mut() {
197 *field = Field::new(field.name(), field.data_type().clone(), true);
198 }
199 self
200 }
201
202 /// Make the last added field NOT NULL
203 ///
204 /// # Example
205 ///
206 /// ```rust
207 /// use dbx_core::SchemaBuilder;
208 ///
209 /// let schema = SchemaBuilder::new()
210 /// .text("email").not_null()
211 /// .build();
212 /// ```
213 pub fn not_null(mut self) -> Self {
214 if let Some(field) = self.fields.last_mut() {
215 *field = Field::new(field.name(), field.data_type().clone(), false);
216 }
217 self
218 }
219}
220
221impl Default for SchemaBuilder {
222 fn default() -> Self {
223 Self::new()
224 }
225}