Skip to main content

hdbconnect_arrow/types/
arrow.rs

1//! Arrow type mappings from HANA types.
2//!
3//! This module provides the authoritative mapping between HANA SQL types
4//! and Apache Arrow types.
5//!
6//! # Type Mapping Table
7//!
8//! | HANA Type | Arrow Type | Notes |
9//! |-----------|------------|-------|
10//! | TINYINT | UInt8 | Unsigned in HANA |
11//! | SMALLINT | Int16 | |
12//! | INT | Int32 | |
13//! | BIGINT | Int64 | |
14//! | REAL | Float32 | |
15//! | DOUBLE | Float64 | |
16//! | DECIMAL(p,s) | Decimal128(p,s) | Full precision |
17//! | CHAR/VARCHAR | Utf8 | |
18//! | NCHAR/NVARCHAR | Utf8 | Unicode strings |
19//! | CLOB/NCLOB | LargeUtf8 | Large strings |
20//! | BLOB | LargeBinary | Large binary |
21//! | DAYDATE | Date32 | Days since epoch |
22//! | SECONDTIME | Time64(Nanosecond) | |
23//! | LONGDATE/SECONDDATE | Timestamp(Nanosecond, None) | |
24//! | BOOLEAN | Boolean | |
25//! | GEOMETRY/POINT | Binary | WKB format |
26
27use std::collections::HashMap;
28
29use arrow_schema::{DataType, Field, TimeUnit};
30use hdbconnect::TypeId;
31
32/// Convert HANA `TypeId` to Arrow `DataType`.
33///
34/// This is the authoritative mapping between HANA SQL types and Arrow types.
35/// The mapping prioritizes:
36/// 1. Precision preservation (especially for decimals)
37/// 2. Zero-copy compatibility with Polars/pandas
38/// 3. Consistent handling of nullable values
39///
40/// # Arguments
41///
42/// * `type_id` - The HANA type identifier
43/// * `precision` - Optional precision for DECIMAL types
44/// * `scale` - Optional scale for DECIMAL types
45///
46/// # Returns
47///
48/// The corresponding Arrow `DataType`.
49#[must_use]
50#[allow(clippy::match_same_arms)] // Intentional: semantic separation of GEOMETRY vs BINARY
51pub fn hana_type_to_arrow(type_id: TypeId, precision: Option<u8>, scale: Option<i8>) -> DataType {
52    match type_id {
53        // Integer types
54        TypeId::TINYINT => DataType::UInt8, // HANA TINYINT is unsigned
55        TypeId::SMALLINT => DataType::Int16,
56        TypeId::INT => DataType::Int32,
57        TypeId::BIGINT => DataType::Int64,
58
59        // Floating point types
60        TypeId::REAL => DataType::Float32,
61        TypeId::DOUBLE => DataType::Float64,
62
63        // Decimal types - preserve precision and scale
64        // Note: SMALLDECIMAL is mapped to DECIMAL in hdbconnect 0.32+
65        TypeId::DECIMAL => {
66            let p = precision.unwrap_or(38).min(38);
67            let s = scale.unwrap_or(0);
68            DataType::Decimal128(p, s)
69        }
70
71        // String types - all map to UTF-8
72        TypeId::CHAR
73        | TypeId::VARCHAR
74        | TypeId::NCHAR
75        | TypeId::NVARCHAR
76        | TypeId::SHORTTEXT
77        | TypeId::ALPHANUM
78        | TypeId::STRING => DataType::Utf8,
79
80        // Binary types
81        TypeId::BINARY | TypeId::VARBINARY => DataType::Binary,
82
83        // LOB types - use Large variants for potentially huge data
84        TypeId::CLOB | TypeId::NCLOB | TypeId::TEXT => DataType::LargeUtf8,
85        TypeId::BLOB => DataType::LargeBinary,
86
87        // Temporal types
88        // Note: DATE/TIME/TIMESTAMP are deprecated in hdbconnect 0.32+
89        // Using DAYDATE, SECONDTIME, LONGDATE, SECONDDATE instead
90        TypeId::DAYDATE => DataType::Date32,
91        TypeId::SECONDTIME => DataType::Time64(TimeUnit::Nanosecond),
92        TypeId::SECONDDATE | TypeId::LONGDATE => DataType::Timestamp(TimeUnit::Nanosecond, None),
93
94        // Boolean
95        TypeId::BOOLEAN => DataType::Boolean,
96
97        // Fixed-size binary types (HANA specific)
98        TypeId::FIXED8 => DataType::FixedSizeBinary(8),
99        TypeId::FIXED12 => DataType::FixedSizeBinary(12),
100        TypeId::FIXED16 => DataType::FixedSizeBinary(16),
101
102        // Spatial types - serialize as WKB binary
103        TypeId::GEOMETRY | TypeId::POINT => DataType::Binary,
104
105        // Unknown/unsupported - fallback to string representation
106        _ => DataType::Utf8,
107    }
108}
109
110/// Create an Arrow Field from HANA column metadata.
111///
112/// # Arguments
113///
114/// * `name` - Column name
115/// * `type_id` - HANA type identifier
116/// * `nullable` - Whether the column allows NULL values
117/// * `precision` - Optional precision for DECIMAL types
118/// * `scale` - Optional scale for DECIMAL types
119#[must_use]
120pub fn hana_field_to_arrow(
121    name: &str,
122    type_id: TypeId,
123    nullable: bool,
124    precision: Option<u8>,
125    scale: Option<i8>,
126) -> Field {
127    Field::new(
128        name,
129        hana_type_to_arrow(type_id, precision, scale),
130        nullable,
131    )
132}
133
134/// Extension trait for hdbconnect `FieldMetadata`.
135///
136/// Provides convenient conversion methods for HANA metadata to Arrow types.
137pub trait FieldMetadataExt {
138    /// Convert to Arrow Field.
139    fn to_arrow_field(&self) -> Field;
140
141    /// Get the Arrow `DataType` for this field.
142    fn arrow_data_type(&self) -> DataType;
143}
144
145/// Extension trait for `hdbconnect_async` `FieldMetadata`.
146///
147/// Provides convenient conversion methods for async HANA metadata to Arrow types.
148#[cfg(feature = "async")]
149pub trait FieldMetadataExtAsync {
150    /// Convert to Arrow Field.
151    fn to_arrow_field(&self) -> Field;
152
153    /// Get the Arrow `DataType` for this field.
154    fn arrow_data_type(&self) -> DataType;
155}
156
157/// Internal macro to implement `FieldMetadataExt` for different `FieldMetadata` types.
158///
159/// Both `hdbconnect::FieldMetadata` and `hdbconnect_async::FieldMetadata` have
160/// identical interfaces, so we use a macro to avoid code duplication.
161macro_rules! impl_field_metadata_ext {
162    ($trait_name:ident for $type:ty) => {
163        impl $trait_name for $type {
164            fn to_arrow_field(&self) -> Field {
165                let name = {
166                    let display = self.displayname();
167                    if display.is_empty() {
168                        self.columnname()
169                    } else {
170                        display
171                    }
172                };
173                let type_id = self.type_id();
174                let precision = self.precision();
175                let scale = self.scale();
176
177                // Convert i16 precision to Option<u8> safely
178                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
179                let precision_u8 = (0..=255_i16)
180                    .contains(&precision)
181                    .then_some(precision as u8);
182
183                // Convert i16 scale to Option<i8> safely
184                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
185                let scale_i8 = (0..=127_i16).contains(&scale).then_some(scale as i8);
186
187                // Build field metadata for capacity hints
188                let mut metadata = HashMap::new();
189
190                // For VARCHAR/NVARCHAR, precision contains max_length
191                if matches!(
192                    type_id,
193                    TypeId::VARCHAR | TypeId::NVARCHAR | TypeId::CHAR | TypeId::NCHAR
194                ) {
195                    metadata.insert("max_length".to_string(), precision.to_string());
196                }
197
198                let field =
199                    hana_field_to_arrow(name, type_id, self.is_nullable(), precision_u8, scale_i8);
200
201                if metadata.is_empty() {
202                    field
203                } else {
204                    field.with_metadata(metadata)
205                }
206            }
207
208            fn arrow_data_type(&self) -> DataType {
209                let precision = self.precision();
210                let scale = self.scale();
211
212                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
213                let precision_u8 = (0..=255_i16)
214                    .contains(&precision)
215                    .then_some(precision as u8);
216
217                #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
218                let scale_i8 = (0..=127_i16).contains(&scale).then_some(scale as i8);
219
220                hana_type_to_arrow(self.type_id(), precision_u8, scale_i8)
221            }
222        }
223    };
224}
225
226// Apply macro for sync version
227impl_field_metadata_ext!(FieldMetadataExt for hdbconnect::FieldMetadata);
228
229// Apply macro for async version
230#[cfg(feature = "async")]
231impl_field_metadata_ext!(FieldMetadataExtAsync for hdbconnect_async::FieldMetadata);
232
233/// Get the HANA type category for a `TypeId`.
234///
235/// Returns the category name as a static string.
236#[must_use]
237pub const fn type_category(type_id: TypeId) -> &'static str {
238    super::conversion::TypeCategory::from_type_id(type_id).as_str()
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    // ═══════════════════════════════════════════════════════════════════════════
246    // Integer Type Mappings
247    // ═══════════════════════════════════════════════════════════════════════════
248
249    #[test]
250    fn test_integer_mappings() {
251        assert_eq!(
252            hana_type_to_arrow(TypeId::TINYINT, None, None),
253            DataType::UInt8
254        );
255        assert_eq!(
256            hana_type_to_arrow(TypeId::SMALLINT, None, None),
257            DataType::Int16
258        );
259        assert_eq!(hana_type_to_arrow(TypeId::INT, None, None), DataType::Int32);
260        assert_eq!(
261            hana_type_to_arrow(TypeId::BIGINT, None, None),
262            DataType::Int64
263        );
264    }
265
266    #[test]
267    fn test_integer_mappings_ignore_precision_scale() {
268        assert_eq!(
269            hana_type_to_arrow(TypeId::INT, Some(10), Some(2)),
270            DataType::Int32
271        );
272        assert_eq!(
273            hana_type_to_arrow(TypeId::BIGINT, Some(20), Some(0)),
274            DataType::Int64
275        );
276    }
277
278    // ═══════════════════════════════════════════════════════════════════════════
279    // Float Type Mappings
280    // ═══════════════════════════════════════════════════════════════════════════
281
282    #[test]
283    fn test_float_mappings() {
284        assert_eq!(
285            hana_type_to_arrow(TypeId::REAL, None, None),
286            DataType::Float32
287        );
288        assert_eq!(
289            hana_type_to_arrow(TypeId::DOUBLE, None, None),
290            DataType::Float64
291        );
292    }
293
294    #[test]
295    fn test_float_mappings_ignore_precision_scale() {
296        assert_eq!(
297            hana_type_to_arrow(TypeId::REAL, Some(24), None),
298            DataType::Float32
299        );
300        assert_eq!(
301            hana_type_to_arrow(TypeId::DOUBLE, Some(53), None),
302            DataType::Float64
303        );
304    }
305
306    // ═══════════════════════════════════════════════════════════════════════════
307    // Decimal Type Mappings
308    // ═══════════════════════════════════════════════════════════════════════════
309
310    #[test]
311    fn test_decimal_mapping() {
312        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(18), Some(2));
313        assert_eq!(dt, DataType::Decimal128(18, 2));
314    }
315
316    #[test]
317    fn test_decimal_defaults() {
318        let dt = hana_type_to_arrow(TypeId::DECIMAL, None, None);
319        assert_eq!(dt, DataType::Decimal128(38, 0));
320    }
321
322    #[test]
323    fn test_decimal_with_only_precision() {
324        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(10), None);
325        assert_eq!(dt, DataType::Decimal128(10, 0));
326    }
327
328    #[test]
329    fn test_decimal_with_only_scale() {
330        let dt = hana_type_to_arrow(TypeId::DECIMAL, None, Some(5));
331        assert_eq!(dt, DataType::Decimal128(38, 5));
332    }
333
334    #[test]
335    fn test_decimal_max_precision() {
336        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(38), Some(10));
337        assert_eq!(dt, DataType::Decimal128(38, 10));
338    }
339
340    #[test]
341    fn test_decimal_min_precision() {
342        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(1), Some(0));
343        assert_eq!(dt, DataType::Decimal128(1, 0));
344    }
345
346    #[test]
347    fn test_decimal_precision_clamped_to_max() {
348        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(50), Some(10));
349        assert_eq!(dt, DataType::Decimal128(38, 10));
350    }
351
352    #[test]
353    fn test_decimal_zero_scale() {
354        let dt = hana_type_to_arrow(TypeId::DECIMAL, Some(18), Some(0));
355        assert_eq!(dt, DataType::Decimal128(18, 0));
356    }
357
358    // ═══════════════════════════════════════════════════════════════════════════
359    // String Type Mappings
360    // ═══════════════════════════════════════════════════════════════════════════
361
362    #[test]
363    fn test_string_mappings() {
364        assert_eq!(
365            hana_type_to_arrow(TypeId::VARCHAR, None, None),
366            DataType::Utf8
367        );
368        assert_eq!(
369            hana_type_to_arrow(TypeId::NVARCHAR, None, None),
370            DataType::Utf8
371        );
372        assert_eq!(
373            hana_type_to_arrow(TypeId::CLOB, None, None),
374            DataType::LargeUtf8
375        );
376    }
377
378    #[test]
379    fn test_all_string_type_variants() {
380        assert_eq!(hana_type_to_arrow(TypeId::CHAR, None, None), DataType::Utf8);
381        assert_eq!(
382            hana_type_to_arrow(TypeId::NCHAR, None, None),
383            DataType::Utf8
384        );
385        assert_eq!(
386            hana_type_to_arrow(TypeId::SHORTTEXT, None, None),
387            DataType::Utf8
388        );
389        assert_eq!(
390            hana_type_to_arrow(TypeId::ALPHANUM, None, None),
391            DataType::Utf8
392        );
393        assert_eq!(
394            hana_type_to_arrow(TypeId::STRING, None, None),
395            DataType::Utf8
396        );
397    }
398
399    #[test]
400    fn test_lob_string_types() {
401        assert_eq!(
402            hana_type_to_arrow(TypeId::CLOB, None, None),
403            DataType::LargeUtf8
404        );
405        assert_eq!(
406            hana_type_to_arrow(TypeId::NCLOB, None, None),
407            DataType::LargeUtf8
408        );
409        assert_eq!(
410            hana_type_to_arrow(TypeId::TEXT, None, None),
411            DataType::LargeUtf8
412        );
413    }
414
415    // ═══════════════════════════════════════════════════════════════════════════
416    // Binary Type Mappings
417    // ═══════════════════════════════════════════════════════════════════════════
418
419    #[test]
420    fn test_binary_mappings() {
421        assert_eq!(
422            hana_type_to_arrow(TypeId::BINARY, None, None),
423            DataType::Binary
424        );
425        assert_eq!(
426            hana_type_to_arrow(TypeId::VARBINARY, None, None),
427            DataType::Binary
428        );
429    }
430
431    #[test]
432    fn test_blob_mapping() {
433        assert_eq!(
434            hana_type_to_arrow(TypeId::BLOB, None, None),
435            DataType::LargeBinary
436        );
437    }
438
439    #[test]
440    fn test_fixed_size_binary_mappings() {
441        assert_eq!(
442            hana_type_to_arrow(TypeId::FIXED8, None, None),
443            DataType::FixedSizeBinary(8)
444        );
445        assert_eq!(
446            hana_type_to_arrow(TypeId::FIXED12, None, None),
447            DataType::FixedSizeBinary(12)
448        );
449        assert_eq!(
450            hana_type_to_arrow(TypeId::FIXED16, None, None),
451            DataType::FixedSizeBinary(16)
452        );
453    }
454
455    // ═══════════════════════════════════════════════════════════════════════════
456    // Temporal Type Mappings
457    // ═══════════════════════════════════════════════════════════════════════════
458
459    #[test]
460    fn test_temporal_mappings() {
461        assert_eq!(
462            hana_type_to_arrow(TypeId::DAYDATE, None, None),
463            DataType::Date32
464        );
465        assert_eq!(
466            hana_type_to_arrow(TypeId::SECONDTIME, None, None),
467            DataType::Time64(TimeUnit::Nanosecond)
468        );
469        assert_eq!(
470            hana_type_to_arrow(TypeId::LONGDATE, None, None),
471            DataType::Timestamp(TimeUnit::Nanosecond, None)
472        );
473    }
474
475    #[test]
476    fn test_seconddate_mapping() {
477        assert_eq!(
478            hana_type_to_arrow(TypeId::SECONDDATE, None, None),
479            DataType::Timestamp(TimeUnit::Nanosecond, None)
480        );
481    }
482
483    // ═══════════════════════════════════════════════════════════════════════════
484    // Boolean Type Mapping
485    // ═══════════════════════════════════════════════════════════════════════════
486
487    #[test]
488    fn test_boolean_mapping() {
489        assert_eq!(
490            hana_type_to_arrow(TypeId::BOOLEAN, None, None),
491            DataType::Boolean
492        );
493    }
494
495    // ═══════════════════════════════════════════════════════════════════════════
496    // Spatial Type Mappings
497    // ═══════════════════════════════════════════════════════════════════════════
498
499    #[test]
500    fn test_spatial_mappings() {
501        assert_eq!(
502            hana_type_to_arrow(TypeId::GEOMETRY, None, None),
503            DataType::Binary
504        );
505        assert_eq!(
506            hana_type_to_arrow(TypeId::POINT, None, None),
507            DataType::Binary
508        );
509    }
510
511    // ═══════════════════════════════════════════════════════════════════════════
512    // Field Creation Tests
513    // ═══════════════════════════════════════════════════════════════════════════
514
515    #[test]
516    fn test_field_creation() {
517        let field = hana_field_to_arrow("amount", TypeId::DECIMAL, true, Some(18), Some(2));
518        assert_eq!(field.name(), "amount");
519        assert!(field.is_nullable());
520        assert_eq!(field.data_type(), &DataType::Decimal128(18, 2));
521    }
522
523    #[test]
524    fn test_field_creation_non_nullable() {
525        let field = hana_field_to_arrow("id", TypeId::INT, false, None, None);
526        assert_eq!(field.name(), "id");
527        assert!(!field.is_nullable());
528        assert_eq!(field.data_type(), &DataType::Int32);
529    }
530
531    #[test]
532    fn test_field_creation_string() {
533        let field = hana_field_to_arrow("name", TypeId::VARCHAR, true, None, None);
534        assert_eq!(field.name(), "name");
535        assert!(field.is_nullable());
536        assert_eq!(field.data_type(), &DataType::Utf8);
537    }
538
539    #[test]
540    fn test_field_creation_temporal() {
541        let field = hana_field_to_arrow("created_at", TypeId::LONGDATE, false, None, None);
542        assert_eq!(field.name(), "created_at");
543        assert!(!field.is_nullable());
544        assert_eq!(
545            field.data_type(),
546            &DataType::Timestamp(TimeUnit::Nanosecond, None)
547        );
548    }
549
550    #[test]
551    fn test_field_creation_empty_name() {
552        let field = hana_field_to_arrow("", TypeId::INT, false, None, None);
553        assert_eq!(field.name(), "");
554        assert_eq!(field.data_type(), &DataType::Int32);
555    }
556
557    #[test]
558    fn test_field_creation_special_characters_in_name() {
559        let field = hana_field_to_arrow("column-name_123", TypeId::INT, false, None, None);
560        assert_eq!(field.name(), "column-name_123");
561    }
562
563    #[test]
564    fn test_field_creation_unicode_name() {
565        let field = hana_field_to_arrow("列名", TypeId::VARCHAR, true, None, None);
566        assert_eq!(field.name(), "列名");
567        assert_eq!(field.data_type(), &DataType::Utf8);
568    }
569
570    // ═══════════════════════════════════════════════════════════════════════════
571    // Type Category Tests
572    // ═══════════════════════════════════════════════════════════════════════════
573
574    #[test]
575    fn test_type_category() {
576        assert_eq!(type_category(TypeId::INT), "Numeric");
577        assert_eq!(type_category(TypeId::DECIMAL), "Decimal");
578        assert_eq!(type_category(TypeId::VARCHAR), "String");
579        assert_eq!(type_category(TypeId::BLOB), "LOB");
580        assert_eq!(type_category(TypeId::DAYDATE), "Temporal");
581    }
582
583    #[test]
584    fn test_type_category_all_numeric() {
585        assert_eq!(type_category(TypeId::TINYINT), "Numeric");
586        assert_eq!(type_category(TypeId::SMALLINT), "Numeric");
587        assert_eq!(type_category(TypeId::INT), "Numeric");
588        assert_eq!(type_category(TypeId::BIGINT), "Numeric");
589        assert_eq!(type_category(TypeId::REAL), "Numeric");
590        assert_eq!(type_category(TypeId::DOUBLE), "Numeric");
591    }
592
593    #[test]
594    fn test_type_category_all_string() {
595        assert_eq!(type_category(TypeId::CHAR), "String");
596        assert_eq!(type_category(TypeId::VARCHAR), "String");
597        assert_eq!(type_category(TypeId::NCHAR), "String");
598        assert_eq!(type_category(TypeId::NVARCHAR), "String");
599        assert_eq!(type_category(TypeId::SHORTTEXT), "String");
600        assert_eq!(type_category(TypeId::ALPHANUM), "String");
601        assert_eq!(type_category(TypeId::STRING), "String");
602    }
603
604    #[test]
605    fn test_type_category_all_binary() {
606        assert_eq!(type_category(TypeId::BINARY), "Binary");
607        assert_eq!(type_category(TypeId::VARBINARY), "Binary");
608        assert_eq!(type_category(TypeId::FIXED8), "Binary");
609        assert_eq!(type_category(TypeId::FIXED12), "Binary");
610        assert_eq!(type_category(TypeId::FIXED16), "Binary");
611    }
612
613    #[test]
614    fn test_type_category_all_lob() {
615        assert_eq!(type_category(TypeId::CLOB), "LOB");
616        assert_eq!(type_category(TypeId::NCLOB), "LOB");
617        assert_eq!(type_category(TypeId::BLOB), "LOB");
618        assert_eq!(type_category(TypeId::TEXT), "LOB");
619    }
620
621    #[test]
622    fn test_type_category_all_temporal() {
623        assert_eq!(type_category(TypeId::DAYDATE), "Temporal");
624        assert_eq!(type_category(TypeId::SECONDTIME), "Temporal");
625        assert_eq!(type_category(TypeId::SECONDDATE), "Temporal");
626        assert_eq!(type_category(TypeId::LONGDATE), "Temporal");
627    }
628
629    #[test]
630    fn test_type_category_spatial() {
631        assert_eq!(type_category(TypeId::GEOMETRY), "Spatial");
632        assert_eq!(type_category(TypeId::POINT), "Spatial");
633    }
634
635    #[test]
636    fn test_type_category_boolean() {
637        assert_eq!(type_category(TypeId::BOOLEAN), "Boolean");
638    }
639}