Skip to main content

hdbconnect_arrow/builders/
factory.rs

1//! Type-safe builder factory using phantom types.
2//!
3//! The factory pattern ensures that builders are created with correct
4//! configurations for each Arrow data type.
5
6use arrow_schema::{DataType, TimeUnit};
7
8use super::boolean::BooleanBuilderWrapper;
9use super::decimal::Decimal128BuilderWrapper;
10use super::primitive::{
11    Float32BuilderWrapper, Float64BuilderWrapper, Int16BuilderWrapper, Int32BuilderWrapper,
12    Int64BuilderWrapper, UInt8BuilderWrapper,
13};
14use super::string::{
15    BinaryBuilderWrapper, FixedSizeBinaryBuilderWrapper, LargeBinaryBuilderWrapper,
16    LargeStringBuilderWrapper, StringBuilderWrapper,
17};
18use super::temporal::{
19    Date32BuilderWrapper, Time64NanosecondBuilderWrapper, TimestampNanosecondBuilderWrapper,
20};
21use crate::traits::builder::HanaCompatibleBuilder;
22use crate::traits::streaming::BatchConfig;
23
24/// Factory for creating type-safe Arrow builders.
25///
26/// The factory ensures builders are created with appropriate capacity
27/// and configuration for each Arrow data type.
28#[derive(Debug, Clone)]
29pub struct BuilderFactory {
30    /// Number of rows to pre-allocate in each builder.
31    capacity: usize,
32    /// Bytes to pre-allocate for string data.
33    string_capacity: usize,
34    /// Bytes to pre-allocate for binary data.
35    binary_capacity: usize,
36    /// Maximum LOB size in bytes before rejecting.
37    max_lob_bytes: Option<usize>,
38}
39
40impl BuilderFactory {
41    /// Create a new factory with the specified row capacity.
42    #[must_use]
43    pub const fn new(capacity: usize) -> Self {
44        Self {
45            capacity,
46            string_capacity: capacity * 32, // Estimate 32 bytes per string
47            binary_capacity: capacity * 64, // Estimate 64 bytes per binary
48            max_lob_bytes: None,
49        }
50    }
51
52    /// Create from `BatchConfig`.
53    #[must_use]
54    #[allow(clippy::missing_const_for_fn)]
55    pub fn from_config(config: &BatchConfig) -> Self {
56        Self {
57            capacity: config.batch_size.get(),
58            string_capacity: config.string_capacity,
59            binary_capacity: config.binary_capacity,
60            max_lob_bytes: config.max_lob_bytes,
61        }
62    }
63
64    /// Set the string data capacity.
65    #[must_use]
66    pub const fn with_string_capacity(mut self, capacity: usize) -> Self {
67        self.string_capacity = capacity;
68        self
69    }
70
71    /// Set the binary data capacity.
72    #[must_use]
73    pub const fn with_binary_capacity(mut self, capacity: usize) -> Self {
74        self.binary_capacity = capacity;
75        self
76    }
77
78    /// Set the maximum LOB size in bytes.
79    #[must_use]
80    pub const fn with_max_lob_bytes(mut self, max: Option<usize>) -> Self {
81        self.max_lob_bytes = max;
82        self
83    }
84
85    /// Create a builder for the specified Arrow data type.
86    ///
87    /// Returns a boxed trait object that implements `HanaCompatibleBuilder`.
88    ///
89    /// # Panics
90    ///
91    /// Panics if the data type is not supported (should not happen if using
92    /// `hana_type_to_arrow` for type mapping).
93    #[must_use]
94    #[allow(clippy::match_same_arms)] // Intentional: explicit Utf8 case for clarity
95    pub fn create_builder(&self, data_type: &DataType) -> Box<dyn HanaCompatibleBuilder> {
96        match data_type {
97            // Primitive numeric types
98            DataType::UInt8 => Box::new(UInt8BuilderWrapper::new(self.capacity)),
99            DataType::Int16 => Box::new(Int16BuilderWrapper::new(self.capacity)),
100            DataType::Int32 => Box::new(Int32BuilderWrapper::new(self.capacity)),
101            DataType::Int64 => Box::new(Int64BuilderWrapper::new(self.capacity)),
102            DataType::Float32 => Box::new(Float32BuilderWrapper::new(self.capacity)),
103            DataType::Float64 => Box::new(Float64BuilderWrapper::new(self.capacity)),
104
105            // Decimal
106            DataType::Decimal128(precision, scale) => Box::new(Decimal128BuilderWrapper::new(
107                self.capacity,
108                *precision,
109                *scale,
110            )),
111
112            // Strings
113            DataType::Utf8 => Box::new(StringBuilderWrapper::new(
114                self.capacity,
115                self.string_capacity,
116            )),
117            DataType::LargeUtf8 => {
118                let mut builder =
119                    LargeStringBuilderWrapper::new(self.capacity, self.string_capacity);
120                if let Some(max) = self.max_lob_bytes {
121                    builder = builder.with_max_lob_bytes(max);
122                }
123                Box::new(builder)
124            }
125
126            // Binary
127            DataType::Binary => Box::new(BinaryBuilderWrapper::new(
128                self.capacity,
129                self.binary_capacity,
130            )),
131            DataType::LargeBinary => {
132                let mut builder =
133                    LargeBinaryBuilderWrapper::new(self.capacity, self.binary_capacity);
134                if let Some(max) = self.max_lob_bytes {
135                    builder = builder.with_max_lob_bytes(max);
136                }
137                Box::new(builder)
138            }
139            DataType::FixedSizeBinary(size) => {
140                Box::new(FixedSizeBinaryBuilderWrapper::new(self.capacity, *size))
141            }
142
143            // Temporal
144            DataType::Date32 => Box::new(Date32BuilderWrapper::new(self.capacity)),
145            DataType::Time64(TimeUnit::Nanosecond) => {
146                Box::new(Time64NanosecondBuilderWrapper::new(self.capacity))
147            }
148            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
149                Box::new(TimestampNanosecondBuilderWrapper::new(self.capacity))
150            }
151
152            // Boolean
153            DataType::Boolean => Box::new(BooleanBuilderWrapper::new(self.capacity)),
154
155            // Unsupported - fallback to string
156            _ => Box::new(StringBuilderWrapper::new(
157                self.capacity,
158                self.string_capacity,
159            )),
160        }
161    }
162
163    /// Create builders for all fields in a schema.
164    ///
165    /// Returns a vector of boxed builders in the same order as schema fields.
166    #[must_use]
167    pub fn create_builders_for_schema(
168        &self,
169        schema: &arrow_schema::Schema,
170    ) -> Vec<Box<dyn HanaCompatibleBuilder>> {
171        let fields = schema.fields();
172        let mut builders = Vec::with_capacity(fields.len());
173        for field in fields {
174            builders.push(self.create_builder(field.data_type()));
175        }
176        builders
177    }
178}
179
180impl Default for BuilderFactory {
181    fn default() -> Self {
182        Self::new(1024)
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use arrow_schema::{DataType, Field, Schema, TimeUnit};
189
190    use super::*;
191
192    // ═══════════════════════════════════════════════════════════════════════════
193    // Factory Creation Tests
194    // ═══════════════════════════════════════════════════════════════════════════
195
196    #[test]
197    fn test_factory_creation() {
198        let factory = BuilderFactory::new(100);
199        assert_eq!(factory.capacity, 100);
200        assert_eq!(factory.string_capacity, 3200);
201        assert_eq!(factory.binary_capacity, 6400);
202        assert!(factory.max_lob_bytes.is_none());
203    }
204
205    #[test]
206    fn test_factory_default() {
207        let factory = BuilderFactory::default();
208        assert_eq!(factory.capacity, 1024);
209        assert_eq!(factory.string_capacity, 1024 * 32);
210        assert_eq!(factory.binary_capacity, 1024 * 64);
211        assert!(factory.max_lob_bytes.is_none());
212    }
213
214    #[test]
215    fn test_factory_from_config() {
216        let config = BatchConfig::with_batch_size(500)
217            .string_capacity(10000)
218            .binary_capacity(20000)
219            .max_lob_bytes(Some(50_000_000));
220
221        let factory = BuilderFactory::from_config(&config);
222        assert_eq!(factory.capacity, 500);
223        assert_eq!(factory.string_capacity, 10000);
224        assert_eq!(factory.binary_capacity, 20000);
225        assert_eq!(factory.max_lob_bytes, Some(50_000_000));
226    }
227
228    #[test]
229    fn test_factory_from_config_without_lob_limit() {
230        let config = BatchConfig::with_batch_size(500)
231            .string_capacity(10000)
232            .binary_capacity(20000);
233
234        let factory = BuilderFactory::from_config(&config);
235        assert!(factory.max_lob_bytes.is_none());
236    }
237
238    #[test]
239    fn test_factory_with_string_capacity() {
240        let factory = BuilderFactory::new(100).with_string_capacity(5000);
241        assert_eq!(factory.capacity, 100);
242        assert_eq!(factory.string_capacity, 5000);
243        assert_eq!(factory.binary_capacity, 6400);
244    }
245
246    #[test]
247    fn test_factory_with_binary_capacity() {
248        let factory = BuilderFactory::new(100).with_binary_capacity(8000);
249        assert_eq!(factory.capacity, 100);
250        assert_eq!(factory.string_capacity, 3200);
251        assert_eq!(factory.binary_capacity, 8000);
252    }
253
254    #[test]
255    fn test_factory_with_max_lob_bytes() {
256        let factory = BuilderFactory::new(100).with_max_lob_bytes(Some(10_000_000));
257        assert_eq!(factory.max_lob_bytes, Some(10_000_000));
258    }
259
260    #[test]
261    fn test_factory_builder_chaining() {
262        let factory = BuilderFactory::new(200)
263            .with_string_capacity(1000)
264            .with_binary_capacity(2000)
265            .with_max_lob_bytes(Some(5_000_000));
266
267        assert_eq!(factory.capacity, 200);
268        assert_eq!(factory.string_capacity, 1000);
269        assert_eq!(factory.binary_capacity, 2000);
270        assert_eq!(factory.max_lob_bytes, Some(5_000_000));
271    }
272
273    // ═══════════════════════════════════════════════════════════════════════════
274    // Primitive Type Builder Creation Tests
275    // ═══════════════════════════════════════════════════════════════════════════
276
277    #[test]
278    fn test_create_primitive_builders() {
279        let factory = BuilderFactory::new(100);
280
281        let _ = factory.create_builder(&DataType::Int32);
282        let _ = factory.create_builder(&DataType::Float64);
283        let _ = factory.create_builder(&DataType::Utf8);
284    }
285
286    #[test]
287    fn test_create_uint8_builder() {
288        let factory = BuilderFactory::new(100);
289        let builder = factory.create_builder(&DataType::UInt8);
290        assert_eq!(builder.len(), 0);
291    }
292
293    #[test]
294    fn test_create_int16_builder() {
295        let factory = BuilderFactory::new(100);
296        let builder = factory.create_builder(&DataType::Int16);
297        assert_eq!(builder.len(), 0);
298    }
299
300    #[test]
301    fn test_create_int32_builder() {
302        let factory = BuilderFactory::new(100);
303        let builder = factory.create_builder(&DataType::Int32);
304        assert_eq!(builder.len(), 0);
305    }
306
307    #[test]
308    fn test_create_int64_builder() {
309        let factory = BuilderFactory::new(100);
310        let builder = factory.create_builder(&DataType::Int64);
311        assert_eq!(builder.len(), 0);
312    }
313
314    #[test]
315    fn test_create_float32_builder() {
316        let factory = BuilderFactory::new(100);
317        let builder = factory.create_builder(&DataType::Float32);
318        assert_eq!(builder.len(), 0);
319    }
320
321    #[test]
322    fn test_create_float64_builder() {
323        let factory = BuilderFactory::new(100);
324        let builder = factory.create_builder(&DataType::Float64);
325        assert_eq!(builder.len(), 0);
326    }
327
328    // ═══════════════════════════════════════════════════════════════════════════
329    // Decimal Builder Creation Tests
330    // ═══════════════════════════════════════════════════════════════════════════
331
332    #[test]
333    fn test_create_decimal_builder() {
334        let factory = BuilderFactory::new(100);
335        let builder = factory.create_builder(&DataType::Decimal128(18, 2));
336        assert_eq!(builder.len(), 0);
337    }
338
339    #[test]
340    fn test_create_decimal_builder_high_precision() {
341        let factory = BuilderFactory::new(100);
342        let builder = factory.create_builder(&DataType::Decimal128(38, 10));
343        assert_eq!(builder.len(), 0);
344    }
345
346    #[test]
347    fn test_create_decimal_builder_low_precision() {
348        let factory = BuilderFactory::new(100);
349        let builder = factory.create_builder(&DataType::Decimal128(1, 0));
350        assert_eq!(builder.len(), 0);
351    }
352
353    // ═══════════════════════════════════════════════════════════════════════════
354    // String/Binary Builder Creation Tests
355    // ═══════════════════════════════════════════════════════════════════════════
356
357    #[test]
358    fn test_create_utf8_builder() {
359        let factory = BuilderFactory::new(100);
360        let builder = factory.create_builder(&DataType::Utf8);
361        assert_eq!(builder.len(), 0);
362    }
363
364    #[test]
365    fn test_create_large_utf8_builder() {
366        let factory = BuilderFactory::new(100);
367        let builder = factory.create_builder(&DataType::LargeUtf8);
368        assert_eq!(builder.len(), 0);
369    }
370
371    #[test]
372    fn test_create_large_utf8_builder_with_lob_limit() {
373        let factory = BuilderFactory::new(100).with_max_lob_bytes(Some(1_000_000));
374        let builder = factory.create_builder(&DataType::LargeUtf8);
375        assert_eq!(builder.len(), 0);
376    }
377
378    #[test]
379    fn test_create_binary_builder() {
380        let factory = BuilderFactory::new(100);
381        let builder = factory.create_builder(&DataType::Binary);
382        assert_eq!(builder.len(), 0);
383    }
384
385    #[test]
386    fn test_create_large_binary_builder() {
387        let factory = BuilderFactory::new(100);
388        let builder = factory.create_builder(&DataType::LargeBinary);
389        assert_eq!(builder.len(), 0);
390    }
391
392    #[test]
393    fn test_create_large_binary_builder_with_lob_limit() {
394        let factory = BuilderFactory::new(100).with_max_lob_bytes(Some(1_000_000));
395        let builder = factory.create_builder(&DataType::LargeBinary);
396        assert_eq!(builder.len(), 0);
397    }
398
399    #[test]
400    fn test_create_fixed_size_binary_builder() {
401        let factory = BuilderFactory::new(100);
402        let builder = factory.create_builder(&DataType::FixedSizeBinary(8));
403        assert_eq!(builder.len(), 0);
404    }
405
406    #[test]
407    fn test_create_fixed_size_binary_builder_various_sizes() {
408        let factory = BuilderFactory::new(100);
409
410        let builder8 = factory.create_builder(&DataType::FixedSizeBinary(8));
411        assert_eq!(builder8.len(), 0);
412
413        let builder12 = factory.create_builder(&DataType::FixedSizeBinary(12));
414        assert_eq!(builder12.len(), 0);
415
416        let builder16 = factory.create_builder(&DataType::FixedSizeBinary(16));
417        assert_eq!(builder16.len(), 0);
418    }
419
420    // ═══════════════════════════════════════════════════════════════════════════
421    // Temporal Builder Creation Tests
422    // ═══════════════════════════════════════════════════════════════════════════
423
424    #[test]
425    fn test_create_date32_builder() {
426        let factory = BuilderFactory::new(100);
427        let builder = factory.create_builder(&DataType::Date32);
428        assert_eq!(builder.len(), 0);
429    }
430
431    #[test]
432    fn test_create_time64_nanosecond_builder() {
433        let factory = BuilderFactory::new(100);
434        let builder = factory.create_builder(&DataType::Time64(TimeUnit::Nanosecond));
435        assert_eq!(builder.len(), 0);
436    }
437
438    #[test]
439    fn test_create_timestamp_nanosecond_builder() {
440        let factory = BuilderFactory::new(100);
441        let builder = factory.create_builder(&DataType::Timestamp(TimeUnit::Nanosecond, None));
442        assert_eq!(builder.len(), 0);
443    }
444
445    // ═══════════════════════════════════════════════════════════════════════════
446    // Boolean Builder Creation Tests
447    // ═══════════════════════════════════════════════════════════════════════════
448
449    #[test]
450    fn test_create_boolean_builder() {
451        let factory = BuilderFactory::new(100);
452        let builder = factory.create_builder(&DataType::Boolean);
453        assert_eq!(builder.len(), 0);
454    }
455
456    // ═══════════════════════════════════════════════════════════════════════════
457    // Unsupported Type Fallback Tests
458    // ═══════════════════════════════════════════════════════════════════════════
459
460    #[test]
461    fn test_create_builder_unsupported_falls_back_to_string() {
462        let factory = BuilderFactory::new(100);
463        let builder = factory.create_builder(&DataType::Duration(TimeUnit::Second));
464        assert_eq!(builder.len(), 0);
465    }
466
467    // ═══════════════════════════════════════════════════════════════════════════
468    // Schema Builder Creation Tests
469    // ═══════════════════════════════════════════════════════════════════════════
470
471    #[test]
472    fn test_create_builders_for_schema() {
473        let schema = Schema::new(vec![
474            Field::new("id", DataType::Int32, false),
475            Field::new("name", DataType::Utf8, true),
476            Field::new("price", DataType::Decimal128(18, 2), false),
477        ]);
478
479        let factory = BuilderFactory::new(100);
480        let builders = factory.create_builders_for_schema(&schema);
481        assert_eq!(builders.len(), 3);
482    }
483
484    #[test]
485    fn test_create_builders_for_empty_schema() {
486        let fields: Vec<Field> = vec![];
487        let schema = Schema::new(fields);
488        let factory = BuilderFactory::new(100);
489        let builders = factory.create_builders_for_schema(&schema);
490        assert_eq!(builders.len(), 0);
491    }
492
493    #[test]
494    fn test_create_builders_for_single_field_schema() {
495        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
496        let factory = BuilderFactory::new(100);
497        let builders = factory.create_builders_for_schema(&schema);
498        assert_eq!(builders.len(), 1);
499    }
500
501    #[test]
502    fn test_create_builders_for_complex_schema() {
503        let schema = Schema::new(vec![
504            Field::new("id", DataType::Int64, false),
505            Field::new("name", DataType::Utf8, true),
506            Field::new("price", DataType::Decimal128(18, 2), false),
507            Field::new("is_active", DataType::Boolean, false),
508            Field::new("created_at", DataType::Date32, true),
509            Field::new(
510                "updated_at",
511                DataType::Timestamp(TimeUnit::Nanosecond, None),
512                true,
513            ),
514            Field::new("data", DataType::Binary, true),
515            Field::new("notes", DataType::LargeUtf8, true),
516        ]);
517
518        let factory = BuilderFactory::new(100);
519        let builders = factory.create_builders_for_schema(&schema);
520        assert_eq!(builders.len(), 8);
521    }
522
523    #[test]
524    fn test_create_builders_all_numeric_types() {
525        let schema = Schema::new(vec![
526            Field::new("tiny", DataType::UInt8, false),
527            Field::new("small", DataType::Int16, false),
528            Field::new("int", DataType::Int32, false),
529            Field::new("big", DataType::Int64, false),
530            Field::new("real", DataType::Float32, false),
531            Field::new("double", DataType::Float64, false),
532        ]);
533
534        let factory = BuilderFactory::new(100);
535        let builders = factory.create_builders_for_schema(&schema);
536        assert_eq!(builders.len(), 6);
537
538        for builder in &builders {
539            assert_eq!(builder.len(), 0);
540        }
541    }
542
543    // ═══════════════════════════════════════════════════════════════════════════
544    // Clone and Debug Tests
545    // ═══════════════════════════════════════════════════════════════════════════
546
547    #[test]
548    fn test_factory_clone() {
549        let factory1 = BuilderFactory::new(100)
550            .with_string_capacity(5000)
551            .with_binary_capacity(10000)
552            .with_max_lob_bytes(Some(1_000_000));
553        let factory2 = factory1.clone();
554
555        assert_eq!(factory1.capacity, factory2.capacity);
556        assert_eq!(factory1.string_capacity, factory2.string_capacity);
557        assert_eq!(factory1.binary_capacity, factory2.binary_capacity);
558        assert_eq!(factory1.max_lob_bytes, factory2.max_lob_bytes);
559    }
560
561    #[test]
562    fn test_factory_debug() {
563        let factory = BuilderFactory::new(100);
564        let debug_str = format!("{:?}", factory);
565        assert!(debug_str.contains("BuilderFactory"));
566        assert!(debug_str.contains("capacity"));
567    }
568}