Skip to main content

hdbconnect_arrow/types/
arrow.rs

1//! Arrow type mappings from HANA types.
2//!
3//! This module provides the authoritative mapping between HANA SQL types
4//! and Apache Arrow types.
5//!
6//! # Type Mapping Table
7//!
8//! | HANA Type | Arrow Type | Notes |
9//! |-----------|------------|-------|
10//! | TINYINT | UInt8 | Unsigned in HANA |
11//! | SMALLINT | Int16 | |
12//! | INT | Int32 | |
13//! | BIGINT | Int64 | |
14//! | REAL | Float32 | |
15//! | DOUBLE | Float64 | |
16//! | DECIMAL(p,s) | Decimal128(p,s) | Full precision |
17//! | CHAR/VARCHAR | Utf8 | |
18//! | NCHAR/NVARCHAR | Utf8 | Unicode strings |
19//! | CLOB/NCLOB | LargeUtf8 | Large strings |
20//! | BLOB | LargeBinary | Large binary |
21//! | DAYDATE | Date32 | Days since epoch |
22//! | SECONDTIME | Time64(Nanosecond) | |
23//! | LONGDATE/SECONDDATE | Timestamp(Nanosecond, None) | |
24//! | BOOLEAN | Boolean | |
25//! | GEOMETRY/POINT | Binary | WKB format |
26
27use arrow_schema::{DataType, Field, TimeUnit};
28use hdbconnect::TypeId;
29
30/// Convert HANA `TypeId` to Arrow `DataType`.
31///
32/// This is the authoritative mapping between HANA SQL types and Arrow types.
33/// The mapping prioritizes:
34/// 1. Precision preservation (especially for decimals)
35/// 2. Zero-copy compatibility with Polars/pandas
36/// 3. Consistent handling of nullable values
37///
38/// # Arguments
39///
40/// * `type_id` - The HANA type identifier
41/// * `precision` - Optional precision for DECIMAL types
42/// * `scale` - Optional scale for DECIMAL types
43///
44/// # Returns
45///
46/// The corresponding Arrow `DataType`.
47#[must_use]
48#[allow(clippy::match_same_arms)] // Intentional: semantic separation of GEOMETRY vs BINARY
49pub fn hana_type_to_arrow(type_id: TypeId, precision: Option<u8>, scale: Option<i8>) -> DataType {
50    match type_id {
51        // Integer types
52        TypeId::TINYINT => DataType::UInt8, // HANA TINYINT is unsigned
53        TypeId::SMALLINT => DataType::Int16,
54        TypeId::INT => DataType::Int32,
55        TypeId::BIGINT => DataType::Int64,
56
57        // Floating point types
58        TypeId::REAL => DataType::Float32,
59        TypeId::DOUBLE => DataType::Float64,
60
61        // Decimal types - preserve precision and scale
62        // Note: SMALLDECIMAL is mapped to DECIMAL in hdbconnect 0.32+
63        TypeId::DECIMAL => {
64            let p = precision.unwrap_or(38).min(38);
65            let s = scale.unwrap_or(0);
66            DataType::Decimal128(p, s)
67        }
68
69        // String types - all map to UTF-8
70        TypeId::CHAR
71        | TypeId::VARCHAR
72        | TypeId::NCHAR
73        | TypeId::NVARCHAR
74        | TypeId::SHORTTEXT
75        | TypeId::ALPHANUM
76        | TypeId::STRING => DataType::Utf8,
77
78        // Binary types
79        TypeId::BINARY | TypeId::VARBINARY => DataType::Binary,
80
81        // LOB types - use Large variants for potentially huge data
82        TypeId::CLOB | TypeId::NCLOB | TypeId::TEXT => DataType::LargeUtf8,
83        TypeId::BLOB => DataType::LargeBinary,
84
85        // Temporal types
86        // Note: DATE/TIME/TIMESTAMP are deprecated in hdbconnect 0.32+
87        // Using DAYDATE, SECONDTIME, LONGDATE, SECONDDATE instead
88        TypeId::DAYDATE => DataType::Date32,
89        TypeId::SECONDTIME => DataType::Time64(TimeUnit::Nanosecond),
90        TypeId::SECONDDATE | TypeId::LONGDATE => DataType::Timestamp(TimeUnit::Nanosecond, None),
91
92        // Boolean
93        TypeId::BOOLEAN => DataType::Boolean,
94
95        // Fixed-size binary types (HANA specific)
96        TypeId::FIXED8 => DataType::FixedSizeBinary(8),
97        TypeId::FIXED12 => DataType::FixedSizeBinary(12),
98        TypeId::FIXED16 => DataType::FixedSizeBinary(16),
99
100        // Spatial types - serialize as WKB binary
101        TypeId::GEOMETRY | TypeId::POINT => DataType::Binary,
102
103        // Unknown/unsupported - fallback to string representation
104        _ => DataType::Utf8,
105    }
106}
107
108/// Create an Arrow Field from HANA column metadata.
109///
110/// # Arguments
111///
112/// * `name` - Column name
113/// * `type_id` - HANA type identifier
114/// * `nullable` - Whether the column allows NULL values
115/// * `precision` - Optional precision for DECIMAL types
116/// * `scale` - Optional scale for DECIMAL types
117#[must_use]
118pub fn hana_field_to_arrow(
119    name: &str,
120    type_id: TypeId,
121    nullable: bool,
122    precision: Option<u8>,
123    scale: Option<i8>,
124) -> Field {
125    Field::new(
126        name,
127        hana_type_to_arrow(type_id, precision, scale),
128        nullable,
129    )
130}
131
132/// Extension trait for hdbconnect `FieldMetadata`.
133///
134/// Provides convenient conversion methods for HANA metadata to Arrow types.
135pub trait FieldMetadataExt {
136    /// Convert to Arrow Field.
137    fn to_arrow_field(&self) -> Field;
138
139    /// Get the Arrow `DataType` for this field.
140    fn arrow_data_type(&self) -> DataType;
141}
142
143/// Extension trait for `hdbconnect_async` `FieldMetadata`.
144///
145/// Provides convenient conversion methods for async HANA metadata to Arrow types.
146#[cfg(feature = "async")]
147pub trait FieldMetadataExtAsync {
148    /// Convert to Arrow Field.
149    fn to_arrow_field(&self) -> Field;
150
151    /// Get the Arrow `DataType` for this field.
152    fn arrow_data_type(&self) -> DataType;
153}
154
155/// Internal macro to implement `FieldMetadataExt` for different `FieldMetadata` types.
156///
157/// Both `hdbconnect::FieldMetadata` and `hdbconnect_async::FieldMetadata` have
158/// identical interfaces, so we use a macro to avoid code duplication.
159macro_rules! impl_field_metadata_ext {
160    ($trait_name:ident for $type:ty) => {
161        impl $trait_name for $type {
162            fn to_arrow_field(&self) -> Field {
163                let name = {
164                    let display = self.displayname();
165                    if display.is_empty() {
166                        self.columnname()
167                    } else {
168                        display
169                    }
170                };
171                let precision = self.precision();
172                let scale = self.scale();
173
174                // Convert i16 precision to Option<u8> safely
175                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
176                let precision_u8 = (0..=255_i16)
177                    .contains(&precision)
178                    .then_some(precision as u8);
179
180                // Convert i16 scale to Option<i8> safely
181                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
182                let scale_i8 = (0..=127_i16).contains(&scale).then_some(scale as i8);
183
184                hana_field_to_arrow(
185                    name,
186                    self.type_id(),
187                    self.is_nullable(),
188                    precision_u8,
189                    scale_i8,
190                )
191            }
192
193            fn arrow_data_type(&self) -> DataType {
194                let precision = self.precision();
195                let scale = self.scale();
196
197                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
198                let precision_u8 = (0..=255_i16)
199                    .contains(&precision)
200                    .then_some(precision as u8);
201
202                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
203                let scale_i8 = (0..=127_i16).contains(&scale).then_some(scale as i8);
204
205                hana_type_to_arrow(self.type_id(), precision_u8, scale_i8)
206            }
207        }
208    };
209}
210
211// Apply macro for sync version
212impl_field_metadata_ext!(FieldMetadataExt for hdbconnect::FieldMetadata);
213
214// Apply macro for async version
215#[cfg(feature = "async")]
216impl_field_metadata_ext!(FieldMetadataExtAsync for hdbconnect_async::FieldMetadata);
217
218/// Get the HANA type category for a `TypeId`.
219///
220/// Returns the category name as a static string.
221#[must_use]
222pub const fn type_category(type_id: TypeId) -> &'static str {
223    super::conversion::TypeCategory::from_type_id(type_id).as_str()
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    // ═══════════════════════════════════════════════════════════════════════════
231    // Integer Type Mappings
232    // ═══════════════════════════════════════════════════════════════════════════
233
234    #[test]
235    fn test_integer_mappings() {
236        assert_eq!(
237            hana_type_to_arrow(TypeId::TINYINT, None, None),
238            DataType::UInt8
239        );
240        assert_eq!(
241            hana_type_to_arrow(TypeId::SMALLINT, None, None),
242            DataType::Int16
243        );
244        assert_eq!(hana_type_to_arrow(TypeId::INT, None, None), DataType::Int32);
245        assert_eq!(
246            hana_type_to_arrow(TypeId::BIGINT, None, None),
247            DataType::Int64
248        );
249    }
250
251    #[test]
252    fn test_integer_mappings_ignore_precision_scale() {
253        assert_eq!(
254            hana_type_to_arrow(TypeId::INT, Some(10), Some(2)),
255            DataType::Int32
256        );
257        assert_eq!(
258            hana_type_to_arrow(TypeId::BIGINT, Some(20), Some(0)),
259            DataType::Int64
260        );
261    }
262
263    // ═══════════════════════════════════════════════════════════════════════════
264    // Float Type Mappings
265    // ═══════════════════════════════════════════════════════════════════════════
266
267    #[test]
268    fn test_float_mappings() {
269        assert_eq!(
270            hana_type_to_arrow(TypeId::REAL, None, None),
271            DataType::Float32
272        );
273        assert_eq!(
274            hana_type_to_arrow(TypeId::DOUBLE, None, None),
275            DataType::Float64
276        );
277    }
278
279    #[test]
280    fn test_float_mappings_ignore_precision_scale() {
281        assert_eq!(
282            hana_type_to_arrow(TypeId::REAL, Some(24), None),
283            DataType::Float32
284        );
285        assert_eq!(
286            hana_type_to_arrow(TypeId::DOUBLE, Some(53), None),
287            DataType::Float64
288        );
289    }
290
291    // ═══════════════════════════════════════════════════════════════════════════
292    // Decimal Type Mappings
293    // ═══════════════════════════════════════════════════════════════════════════
294
295    #[test]
296    fn test_decimal_mapping() {
297        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(18), Some(2));
298        assert_eq!(dt, DataType::Decimal128(18, 2));
299    }
300
301    #[test]
302    fn test_decimal_defaults() {
303        let dt = hana_type_to_arrow(TypeId::DECIMAL, None, None);
304        assert_eq!(dt, DataType::Decimal128(38, 0));
305    }
306
307    #[test]
308    fn test_decimal_with_only_precision() {
309        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(10), None);
310        assert_eq!(dt, DataType::Decimal128(10, 0));
311    }
312
313    #[test]
314    fn test_decimal_with_only_scale() {
315        let dt = hana_type_to_arrow(TypeId::DECIMAL, None, Some(5));
316        assert_eq!(dt, DataType::Decimal128(38, 5));
317    }
318
319    #[test]
320    fn test_decimal_max_precision() {
321        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(38), Some(10));
322        assert_eq!(dt, DataType::Decimal128(38, 10));
323    }
324
325    #[test]
326    fn test_decimal_min_precision() {
327        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(1), Some(0));
328        assert_eq!(dt, DataType::Decimal128(1, 0));
329    }
330
331    #[test]
332    fn test_decimal_precision_clamped_to_max() {
333        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(50), Some(10));
334        assert_eq!(dt, DataType::Decimal128(38, 10));
335    }
336
337    #[test]
338    fn test_decimal_zero_scale() {
339        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(18), Some(0));
340        assert_eq!(dt, DataType::Decimal128(18, 0));
341    }
342
343    // ═══════════════════════════════════════════════════════════════════════════
344    // String Type Mappings
345    // ═══════════════════════════════════════════════════════════════════════════
346
347    #[test]
348    fn test_string_mappings() {
349        assert_eq!(
350            hana_type_to_arrow(TypeId::VARCHAR, None, None),
351            DataType::Utf8
352        );
353        assert_eq!(
354            hana_type_to_arrow(TypeId::NVARCHAR, None, None),
355            DataType::Utf8
356        );
357        assert_eq!(
358            hana_type_to_arrow(TypeId::CLOB, None, None),
359            DataType::LargeUtf8
360        );
361    }
362
363    #[test]
364    fn test_all_string_type_variants() {
365        assert_eq!(hana_type_to_arrow(TypeId::CHAR, None, None), DataType::Utf8);
366        assert_eq!(
367            hana_type_to_arrow(TypeId::NCHAR, None, None),
368            DataType::Utf8
369        );
370        assert_eq!(
371            hana_type_to_arrow(TypeId::SHORTTEXT, None, None),
372            DataType::Utf8
373        );
374        assert_eq!(
375            hana_type_to_arrow(TypeId::ALPHANUM, None, None),
376            DataType::Utf8
377        );
378        assert_eq!(
379            hana_type_to_arrow(TypeId::STRING, None, None),
380            DataType::Utf8
381        );
382    }
383
384    #[test]
385    fn test_lob_string_types() {
386        assert_eq!(
387            hana_type_to_arrow(TypeId::CLOB, None, None),
388            DataType::LargeUtf8
389        );
390        assert_eq!(
391            hana_type_to_arrow(TypeId::NCLOB, None, None),
392            DataType::LargeUtf8
393        );
394        assert_eq!(
395            hana_type_to_arrow(TypeId::TEXT, None, None),
396            DataType::LargeUtf8
397        );
398    }
399
400    // ═══════════════════════════════════════════════════════════════════════════
401    // Binary Type Mappings
402    // ═══════════════════════════════════════════════════════════════════════════
403
404    #[test]
405    fn test_binary_mappings() {
406        assert_eq!(
407            hana_type_to_arrow(TypeId::BINARY, None, None),
408            DataType::Binary
409        );
410        assert_eq!(
411            hana_type_to_arrow(TypeId::VARBINARY, None, None),
412            DataType::Binary
413        );
414    }
415
416    #[test]
417    fn test_blob_mapping() {
418        assert_eq!(
419            hana_type_to_arrow(TypeId::BLOB, None, None),
420            DataType::LargeBinary
421        );
422    }
423
424    #[test]
425    fn test_fixed_size_binary_mappings() {
426        assert_eq!(
427            hana_type_to_arrow(TypeId::FIXED8, None, None),
428            DataType::FixedSizeBinary(8)
429        );
430        assert_eq!(
431            hana_type_to_arrow(TypeId::FIXED12, None, None),
432            DataType::FixedSizeBinary(12)
433        );
434        assert_eq!(
435            hana_type_to_arrow(TypeId::FIXED16, None, None),
436            DataType::FixedSizeBinary(16)
437        );
438    }
439
440    // ═══════════════════════════════════════════════════════════════════════════
441    // Temporal Type Mappings
442    // ═══════════════════════════════════════════════════════════════════════════
443
444    #[test]
445    fn test_temporal_mappings() {
446        assert_eq!(
447            hana_type_to_arrow(TypeId::DAYDATE, None, None),
448            DataType::Date32
449        );
450        assert_eq!(
451            hana_type_to_arrow(TypeId::SECONDTIME, None, None),
452            DataType::Time64(TimeUnit::Nanosecond)
453        );
454        assert_eq!(
455            hana_type_to_arrow(TypeId::LONGDATE, None, None),
456            DataType::Timestamp(TimeUnit::Nanosecond, None)
457        );
458    }
459
460    #[test]
461    fn test_seconddate_mapping() {
462        assert_eq!(
463            hana_type_to_arrow(TypeId::SECONDDATE, None, None),
464            DataType::Timestamp(TimeUnit::Nanosecond, None)
465        );
466    }
467
468    // ═══════════════════════════════════════════════════════════════════════════
469    // Boolean Type Mapping
470    // ═══════════════════════════════════════════════════════════════════════════
471
472    #[test]
473    fn test_boolean_mapping() {
474        assert_eq!(
475            hana_type_to_arrow(TypeId::BOOLEAN, None, None),
476            DataType::Boolean
477        );
478    }
479
480    // ═══════════════════════════════════════════════════════════════════════════
481    // Spatial Type Mappings
482    // ═══════════════════════════════════════════════════════════════════════════
483
484    #[test]
485    fn test_spatial_mappings() {
486        assert_eq!(
487            hana_type_to_arrow(TypeId::GEOMETRY, None, None),
488            DataType::Binary
489        );
490        assert_eq!(
491            hana_type_to_arrow(TypeId::POINT, None, None),
492            DataType::Binary
493        );
494    }
495
496    // ═══════════════════════════════════════════════════════════════════════════
497    // Field Creation Tests
498    // ═══════════════════════════════════════════════════════════════════════════
499
500    #[test]
501    fn test_field_creation() {
502        let field = hana_field_to_arrow("amount", TypeId::DECIMAL, true, Some(18), Some(2));
503        assert_eq!(field.name(), "amount");
504        assert!(field.is_nullable());
505        assert_eq!(field.data_type(), &DataType::Decimal128(18, 2));
506    }
507
508    #[test]
509    fn test_field_creation_non_nullable() {
510        let field = hana_field_to_arrow("id", TypeId::INT, false, None, None);
511        assert_eq!(field.name(), "id");
512        assert!(!field.is_nullable());
513        assert_eq!(field.data_type(), &DataType::Int32);
514    }
515
516    #[test]
517    fn test_field_creation_string() {
518        let field = hana_field_to_arrow("name", TypeId::VARCHAR, true, None, None);
519        assert_eq!(field.name(), "name");
520        assert!(field.is_nullable());
521        assert_eq!(field.data_type(), &DataType::Utf8);
522    }
523
524    #[test]
525    fn test_field_creation_temporal() {
526        let field = hana_field_to_arrow("created_at", TypeId::LONGDATE, false, None, None);
527        assert_eq!(field.name(), "created_at");
528        assert!(!field.is_nullable());
529        assert_eq!(
530            field.data_type(),
531            &DataType::Timestamp(TimeUnit::Nanosecond, None)
532        );
533    }
534
535    #[test]
536    fn test_field_creation_empty_name() {
537        let field = hana_field_to_arrow("", TypeId::INT, false, None, None);
538        assert_eq!(field.name(), "");
539        assert_eq!(field.data_type(), &DataType::Int32);
540    }
541
542    #[test]
543    fn test_field_creation_special_characters_in_name() {
544        let field = hana_field_to_arrow("column-name_123", TypeId::INT, false, None, None);
545        assert_eq!(field.name(), "column-name_123");
546    }
547
548    #[test]
549    fn test_field_creation_unicode_name() {
550        let field = hana_field_to_arrow("列名", TypeId::VARCHAR, true, None, None);
551        assert_eq!(field.name(), "列名");
552        assert_eq!(field.data_type(), &DataType::Utf8);
553    }
554
555    // ═══════════════════════════════════════════════════════════════════════════
556    // Type Category Tests
557    // ═══════════════════════════════════════════════════════════════════════════
558
559    #[test]
560    fn test_type_category() {
561        assert_eq!(type_category(TypeId::INT), "Numeric");
562        assert_eq!(type_category(TypeId::DECIMAL), "Decimal");
563        assert_eq!(type_category(TypeId::VARCHAR), "String");
564        assert_eq!(type_category(TypeId::BLOB), "LOB");
565        assert_eq!(type_category(TypeId::DAYDATE), "Temporal");
566    }
567
568    #[test]
569    fn test_type_category_all_numeric() {
570        assert_eq!(type_category(TypeId::TINYINT), "Numeric");
571        assert_eq!(type_category(TypeId::SMALLINT), "Numeric");
572        assert_eq!(type_category(TypeId::INT), "Numeric");
573        assert_eq!(type_category(TypeId::BIGINT), "Numeric");
574        assert_eq!(type_category(TypeId::REAL), "Numeric");
575        assert_eq!(type_category(TypeId::DOUBLE), "Numeric");
576    }
577
578    #[test]
579    fn test_type_category_all_string() {
580        assert_eq!(type_category(TypeId::CHAR), "String");
581        assert_eq!(type_category(TypeId::VARCHAR), "String");
582        assert_eq!(type_category(TypeId::NCHAR), "String");
583        assert_eq!(type_category(TypeId::NVARCHAR), "String");
584        assert_eq!(type_category(TypeId::SHORTTEXT), "String");
585        assert_eq!(type_category(TypeId::ALPHANUM), "String");
586        assert_eq!(type_category(TypeId::STRING), "String");
587    }
588
589    #[test]
590    fn test_type_category_all_binary() {
591        assert_eq!(type_category(TypeId::BINARY), "Binary");
592        assert_eq!(type_category(TypeId::VARBINARY), "Binary");
593        assert_eq!(type_category(TypeId::FIXED8), "Binary");
594        assert_eq!(type_category(TypeId::FIXED12), "Binary");
595        assert_eq!(type_category(TypeId::FIXED16), "Binary");
596    }
597
598    #[test]
599    fn test_type_category_all_lob() {
600        assert_eq!(type_category(TypeId::CLOB), "LOB");
601        assert_eq!(type_category(TypeId::NCLOB), "LOB");
602        assert_eq!(type_category(TypeId::BLOB), "LOB");
603        assert_eq!(type_category(TypeId::TEXT), "LOB");
604    }
605
606    #[test]
607    fn test_type_category_all_temporal() {
608        assert_eq!(type_category(TypeId::DAYDATE), "Temporal");
609        assert_eq!(type_category(TypeId::SECONDTIME), "Temporal");
610        assert_eq!(type_category(TypeId::SECONDDATE), "Temporal");
611        assert_eq!(type_category(TypeId::LONGDATE), "Temporal");
612    }
613
614    #[test]
615    fn test_type_category_spatial() {
616        assert_eq!(type_category(TypeId::GEOMETRY), "Spatial");
617        assert_eq!(type_category(TypeId::POINT), "Spatial");
618    }
619
620    #[test]
621    fn test_type_category_boolean() {
622        assert_eq!(type_category(TypeId::BOOLEAN), "Boolean");
623    }
624}