Skip to main content

exarrow_rs/types/
mapping.rs

1//! Type mapping between Exasol and Apache Arrow data types.
2
3use crate::error::ConversionError;
4use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Column name handling mode for DDL generation.
9///
10/// Controls how column names from source schemas are transformed
11/// when generating Exasol CREATE TABLE DDL statements.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13pub enum ColumnNameMode {
14    /// Preserve original column names exactly, wrapped in double quotes.
15    ///
16    /// This mode:
17    /// - Wraps all names in double quotes
18    /// - Escapes internal double quotes by doubling them
19    /// - Preserves case sensitivity and special characters
20    ///
21    /// Example: `my Column` becomes `"my Column"`
22    #[default]
23    Quoted,
24
25    /// Sanitize column names to valid Exasol identifiers.
26    ///
27    /// This mode:
28    /// - Converts names to uppercase
29    /// - Replaces invalid identifier characters with underscores
30    /// - Prefixes names starting with digits with an underscore
31    ///
32    /// Example: `my Column` becomes `MY_COLUMN`
33    Sanitize,
34}
35
36/// Exasol data type representation.
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38#[serde(tag = "type", rename_all = "UPPERCASE")]
39pub enum ExasolType {
40    /// BOOLEAN type
41    #[serde(rename = "BOOLEAN")]
42    Boolean,
43
44    /// CHAR(n) type
45    #[serde(rename = "CHAR")]
46    Char { size: usize },
47
48    /// VARCHAR(n) type
49    #[serde(rename = "VARCHAR")]
50    Varchar { size: usize },
51
52    /// DECIMAL(p, s) type
53    #[serde(rename = "DECIMAL")]
54    Decimal { precision: u8, scale: i8 },
55
56    /// DOUBLE PRECISION type
57    #[serde(rename = "DOUBLE")]
58    Double,
59
60    /// DATE type
61    #[serde(rename = "DATE")]
62    Date,
63
64    /// TIMESTAMP type
65    #[serde(rename = "TIMESTAMP")]
66    Timestamp { with_local_time_zone: bool },
67
68    /// INTERVAL YEAR TO MONTH
69    #[serde(rename = "INTERVAL YEAR TO MONTH")]
70    IntervalYearToMonth,
71
72    /// INTERVAL DAY TO SECOND
73    #[serde(rename = "INTERVAL DAY TO SECOND")]
74    IntervalDayToSecond { precision: u8 },
75
76    /// GEOMETRY type
77    #[serde(rename = "GEOMETRY")]
78    Geometry { srid: Option<i32> },
79
80    /// HASHTYPE type (for hash values)
81    #[serde(rename = "HASHTYPE")]
82    Hashtype { byte_size: usize },
83}
84
85impl ExasolType {
86    /// Convert this Exasol type to a DDL type string suitable for CREATE TABLE statements.
87    ///
88    /// # Returns
89    ///
90    /// A string representing the Exasol DDL type, e.g., "VARCHAR(100)", "DECIMAL(18,2)".
91    #[must_use]
92    pub fn to_ddl_type(&self) -> String {
93        match self {
94            ExasolType::Boolean => "BOOLEAN".to_string(),
95            ExasolType::Char { size } => format!("CHAR({size})"),
96            ExasolType::Varchar { size } => format!("VARCHAR({size})"),
97            ExasolType::Decimal { precision, scale } => format!("DECIMAL({precision},{scale})"),
98            ExasolType::Double => "DOUBLE".to_string(),
99            ExasolType::Date => "DATE".to_string(),
100            ExasolType::Timestamp {
101                with_local_time_zone,
102            } => {
103                if *with_local_time_zone {
104                    "TIMESTAMP WITH LOCAL TIME ZONE".to_string()
105                } else {
106                    "TIMESTAMP".to_string()
107                }
108            }
109            ExasolType::IntervalYearToMonth => "INTERVAL YEAR TO MONTH".to_string(),
110            ExasolType::IntervalDayToSecond { precision } => {
111                format!("INTERVAL DAY TO SECOND({precision})")
112            }
113            ExasolType::Geometry { srid } => {
114                if let Some(srid) = srid {
115                    format!("GEOMETRY({srid})")
116                } else {
117                    "GEOMETRY".to_string()
118                }
119            }
120            ExasolType::Hashtype { byte_size } => format!("HASHTYPE({} BYTE)", byte_size),
121        }
122    }
123}
124
125/// Type mapper for converting between Exasol and Arrow types.
126pub struct TypeMapper;
127
128impl TypeMapper {
129    /// Convert an Exasol type to an Arrow DataType.
130    ///
131    /// # Arguments
132    /// * `exasol_type` - The Exasol type to convert
133    /// * `nullable` - Whether the field is nullable
134    ///
135    /// # Returns
136    /// The corresponding Arrow DataType
137    ///
138    /// # Errors
139    /// Returns `ConversionError::UnsupportedType` if the type cannot be mapped
140    pub fn exasol_to_arrow(
141        exasol_type: &ExasolType,
142        nullable: bool,
143    ) -> Result<DataType, ConversionError> {
144        let _ = nullable; // Arrow nullability is handled at the Field level
145
146        match exasol_type {
147            ExasolType::Boolean => Ok(DataType::Boolean),
148
149            ExasolType::Char { .. } | ExasolType::Varchar { .. } => Ok(DataType::Utf8),
150
151            // Exasol DECIMAL precision is limited to 1-36 digits (per Exasol documentation).
152            // Arrow Decimal128 supports up to 38 digits, so all Exasol decimals fit.
153            // See: https://docs.exasol.com/db/latest/sql_references/data_types/data_type_size.htm
154            ExasolType::Decimal { precision, scale } => {
155                Ok(DataType::Decimal128(*precision, *scale))
156            }
157
158            ExasolType::Double => Ok(DataType::Float64),
159
160            ExasolType::Date => Ok(DataType::Date32),
161
162            ExasolType::Timestamp {
163                with_local_time_zone,
164            } => {
165                if *with_local_time_zone {
166                    // Timestamp with local timezone -> Timestamp with UTC timezone
167                    Ok(DataType::Timestamp(
168                        TimeUnit::Microsecond,
169                        Some("UTC".into()),
170                    ))
171                } else {
172                    // Timestamp without timezone
173                    Ok(DataType::Timestamp(TimeUnit::Microsecond, None))
174                }
175            }
176
177            ExasolType::IntervalYearToMonth => {
178                // Map to MonthDayNano interval (only using month component)
179                Ok(DataType::Interval(IntervalUnit::MonthDayNano))
180            }
181
182            ExasolType::IntervalDayToSecond { .. } => {
183                // Map to MonthDayNano interval (using day and nanosecond components)
184                Ok(DataType::Interval(IntervalUnit::MonthDayNano))
185            }
186
187            ExasolType::Geometry { .. } => {
188                // Geometry as binary (WKB - Well-Known Binary)
189                Ok(DataType::Binary)
190            }
191
192            ExasolType::Hashtype { .. } => {
193                // Hash values as fixed-size binary
194                Ok(DataType::Binary)
195            }
196        }
197    }
198
199    /// Convert an Arrow DataType to an Exasol type.
200    ///
201    /// This is used for parameter binding and type inference.
202    ///
203    /// # Arguments
204    /// * `arrow_type` - The Arrow type to convert
205    ///
206    /// # Returns
207    /// The corresponding Exasol type
208    ///
209    /// # Errors
210    /// Returns `ConversionError::UnsupportedType` if the type cannot be mapped
211    pub fn arrow_to_exasol(arrow_type: &DataType) -> Result<ExasolType, ConversionError> {
212        match arrow_type {
213            DataType::Boolean => Ok(ExasolType::Boolean),
214
215            DataType::Utf8 | DataType::LargeUtf8 => {
216                // Default to VARCHAR(2000000) for string types
217                Ok(ExasolType::Varchar { size: 2000000 })
218            }
219
220            DataType::Int8 | DataType::Int16 | DataType::Int32 => Ok(ExasolType::Decimal {
221                precision: 18,
222                scale: 0,
223            }),
224
225            DataType::Int64 => Ok(ExasolType::Decimal {
226                precision: 36,
227                scale: 0,
228            }),
229
230            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 => Ok(ExasolType::Decimal {
231                precision: 18,
232                scale: 0,
233            }),
234
235            DataType::UInt64 => Ok(ExasolType::Decimal {
236                precision: 36,
237                scale: 0,
238            }),
239
240            DataType::Float32 | DataType::Float64 => Ok(ExasolType::Double),
241
242            DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
243                Ok(ExasolType::Decimal {
244                    precision: *precision,
245                    scale: *scale,
246                })
247            }
248
249            DataType::Date32 | DataType::Date64 => Ok(ExasolType::Date),
250
251            DataType::Timestamp(_, tz) => Ok(ExasolType::Timestamp {
252                with_local_time_zone: tz.is_some(),
253            }),
254
255            DataType::Interval(_) => Ok(ExasolType::IntervalDayToSecond { precision: 3 }),
256
257            DataType::Binary | DataType::LargeBinary => {
258                Ok(ExasolType::Varchar { size: 2000000 }) // Store as hex string
259            }
260
261            _ => Err(ConversionError::UnsupportedType {
262                exasol_type: format!("Arrow type {:?}", arrow_type),
263            }),
264        }
265    }
266
267    /// Create Arrow field metadata to preserve Exasol type information.
268    ///
269    /// This allows round-tripping type information.
270    pub fn create_field_metadata(exasol_type: &ExasolType) -> HashMap<String, String> {
271        let mut metadata = HashMap::new();
272
273        metadata.insert(
274            "exasol:type".to_string(),
275            serde_json::to_string(exasol_type).unwrap_or_default(),
276        );
277
278        match exasol_type {
279            ExasolType::Char { size } => {
280                metadata.insert("exasol:size".to_string(), size.to_string());
281            }
282            ExasolType::Varchar { size } => {
283                metadata.insert("exasol:size".to_string(), size.to_string());
284            }
285            ExasolType::Decimal { precision, scale } => {
286                metadata.insert("exasol:precision".to_string(), precision.to_string());
287                metadata.insert("exasol:scale".to_string(), scale.to_string());
288            }
289            ExasolType::Geometry { srid: Some(srid) } => {
290                metadata.insert("exasol:srid".to_string(), srid.to_string());
291            }
292            ExasolType::Geometry { srid: None } => {}
293            _ => {}
294        }
295
296        metadata
297    }
298
299    /// Extract Exasol type from Arrow field metadata.
300    pub fn from_field_metadata(metadata: &HashMap<String, String>) -> Option<ExasolType> {
301        metadata
302            .get("exasol:type")
303            .and_then(|s| serde_json::from_str(s).ok())
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn test_boolean_mapping() {
313        let exasol_type = ExasolType::Boolean;
314        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
315        assert_eq!(arrow_type, DataType::Boolean);
316
317        let reverse = TypeMapper::arrow_to_exasol(&arrow_type).unwrap();
318        assert_eq!(reverse, ExasolType::Boolean);
319    }
320
321    #[test]
322    fn test_string_mapping() {
323        let exasol_type = ExasolType::Varchar { size: 100 };
324        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
325        assert_eq!(arrow_type, DataType::Utf8);
326    }
327
328    #[test]
329    fn test_decimal_mapping() {
330        let exasol_type = ExasolType::Decimal {
331            precision: 18,
332            scale: 2,
333        };
334        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
335        assert_eq!(arrow_type, DataType::Decimal128(18, 2));
336
337        let reverse = TypeMapper::arrow_to_exasol(&arrow_type).unwrap();
338        assert_eq!(reverse, exasol_type);
339    }
340
341    #[test]
342    fn test_arrow_decimal256_to_exasol() {
343        // Test that Arrow Decimal256 types (from external sources) correctly convert to Exasol type.
344        // Note: Exasol itself only produces precision <= 36, but Arrow sources may use Decimal256.
345        let arrow_type = DataType::Decimal256(40, 5);
346        let exasol_type = TypeMapper::arrow_to_exasol(&arrow_type).unwrap();
347        assert_eq!(
348            exasol_type,
349            ExasolType::Decimal {
350                precision: 40,
351                scale: 5
352            }
353        );
354    }
355
356    #[test]
357    fn test_timestamp_mapping() {
358        let exasol_type = ExasolType::Timestamp {
359            with_local_time_zone: false,
360        };
361        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
362        assert!(matches!(
363            arrow_type,
364            DataType::Timestamp(TimeUnit::Microsecond, None)
365        ));
366    }
367
368    #[test]
369    fn test_timestamp_with_tz_mapping() {
370        let exasol_type = ExasolType::Timestamp {
371            with_local_time_zone: true,
372        };
373        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
374        assert!(matches!(
375            arrow_type,
376            DataType::Timestamp(TimeUnit::Microsecond, Some(_))
377        ));
378    }
379
380    #[test]
381    fn test_date_mapping() {
382        let exasol_type = ExasolType::Date;
383        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
384        assert_eq!(arrow_type, DataType::Date32);
385    }
386
387    #[test]
388    fn test_metadata_preservation() {
389        let exasol_type = ExasolType::Decimal {
390            precision: 18,
391            scale: 2,
392        };
393
394        let metadata = TypeMapper::create_field_metadata(&exasol_type);
395        assert!(metadata.contains_key("exasol:type"));
396        assert_eq!(metadata.get("exasol:precision"), Some(&"18".to_string()));
397        assert_eq!(metadata.get("exasol:scale"), Some(&"2".to_string()));
398
399        let restored = TypeMapper::from_field_metadata(&metadata).unwrap();
400        assert_eq!(restored, exasol_type);
401    }
402
403    #[test]
404    fn test_geometry_mapping() {
405        let exasol_type = ExasolType::Geometry { srid: Some(4326) };
406        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
407        assert_eq!(arrow_type, DataType::Binary);
408    }
409
410    #[test]
411    fn test_interval_mapping() {
412        let exasol_type = ExasolType::IntervalDayToSecond { precision: 3 };
413        let arrow_type = TypeMapper::exasol_to_arrow(&exasol_type, true).unwrap();
414        assert!(matches!(
415            arrow_type,
416            DataType::Interval(IntervalUnit::MonthDayNano)
417        ));
418    }
419
420    #[test]
421    fn test_uint_to_exasol_mapping() {
422        // UInt8, UInt16, UInt32 -> DECIMAL(18,0)
423        let uint8 = TypeMapper::arrow_to_exasol(&DataType::UInt8).unwrap();
424        assert_eq!(
425            uint8,
426            ExasolType::Decimal {
427                precision: 18,
428                scale: 0
429            }
430        );
431
432        let uint16 = TypeMapper::arrow_to_exasol(&DataType::UInt16).unwrap();
433        assert_eq!(
434            uint16,
435            ExasolType::Decimal {
436                precision: 18,
437                scale: 0
438            }
439        );
440
441        let uint32 = TypeMapper::arrow_to_exasol(&DataType::UInt32).unwrap();
442        assert_eq!(
443            uint32,
444            ExasolType::Decimal {
445                precision: 18,
446                scale: 0
447            }
448        );
449
450        // UInt64 -> DECIMAL(36,0)
451        let uint64 = TypeMapper::arrow_to_exasol(&DataType::UInt64).unwrap();
452        assert_eq!(
453            uint64,
454            ExasolType::Decimal {
455                precision: 36,
456                scale: 0
457            }
458        );
459    }
460
461    #[test]
462    fn test_to_ddl_type_boolean() {
463        let t = ExasolType::Boolean;
464        assert_eq!(t.to_ddl_type(), "BOOLEAN");
465    }
466
467    #[test]
468    fn test_to_ddl_type_char() {
469        let t = ExasolType::Char { size: 50 };
470        assert_eq!(t.to_ddl_type(), "CHAR(50)");
471    }
472
473    #[test]
474    fn test_to_ddl_type_varchar() {
475        let t = ExasolType::Varchar { size: 2000000 };
476        assert_eq!(t.to_ddl_type(), "VARCHAR(2000000)");
477    }
478
479    #[test]
480    fn test_to_ddl_type_decimal() {
481        let t = ExasolType::Decimal {
482            precision: 18,
483            scale: 2,
484        };
485        assert_eq!(t.to_ddl_type(), "DECIMAL(18,2)");
486    }
487
488    #[test]
489    fn test_to_ddl_type_double() {
490        let t = ExasolType::Double;
491        assert_eq!(t.to_ddl_type(), "DOUBLE");
492    }
493
494    #[test]
495    fn test_to_ddl_type_date() {
496        let t = ExasolType::Date;
497        assert_eq!(t.to_ddl_type(), "DATE");
498    }
499
500    #[test]
501    fn test_to_ddl_type_timestamp() {
502        let t = ExasolType::Timestamp {
503            with_local_time_zone: false,
504        };
505        assert_eq!(t.to_ddl_type(), "TIMESTAMP");
506
507        let t_tz = ExasolType::Timestamp {
508            with_local_time_zone: true,
509        };
510        assert_eq!(t_tz.to_ddl_type(), "TIMESTAMP WITH LOCAL TIME ZONE");
511    }
512
513    #[test]
514    fn test_to_ddl_type_interval() {
515        let t = ExasolType::IntervalYearToMonth;
516        assert_eq!(t.to_ddl_type(), "INTERVAL YEAR TO MONTH");
517
518        let t2 = ExasolType::IntervalDayToSecond { precision: 6 };
519        assert_eq!(t2.to_ddl_type(), "INTERVAL DAY TO SECOND(6)");
520    }
521
522    #[test]
523    fn test_to_ddl_type_geometry() {
524        let t = ExasolType::Geometry { srid: None };
525        assert_eq!(t.to_ddl_type(), "GEOMETRY");
526
527        let t_srid = ExasolType::Geometry { srid: Some(4326) };
528        assert_eq!(t_srid.to_ddl_type(), "GEOMETRY(4326)");
529    }
530
531    #[test]
532    fn test_to_ddl_type_hashtype() {
533        let t = ExasolType::Hashtype { byte_size: 16 };
534        assert_eq!(t.to_ddl_type(), "HASHTYPE(16 BYTE)");
535    }
536
537    #[test]
538    fn test_column_name_mode_default() {
539        let mode = ColumnNameMode::default();
540        assert_eq!(mode, ColumnNameMode::Quoted);
541    }
542}