databend_driver_core/
schema.rs

1// Copyright 2021 Datafuse Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use databend_client::SchemaField as APISchemaField;
18
19use crate::error::{Error, Result};
20
21#[cfg(feature = "flight-sql")]
22use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, SchemaRef as ArrowSchemaRef};
23
24// Extension types defined by Databend
25#[cfg(feature = "flight-sql")]
26pub(crate) const EXTENSION_KEY: &str = "Extension";
27#[cfg(feature = "flight-sql")]
28pub(crate) const ARROW_EXT_TYPE_EMPTY_ARRAY: &str = "EmptyArray";
29#[cfg(feature = "flight-sql")]
30pub(crate) const ARROW_EXT_TYPE_EMPTY_MAP: &str = "EmptyMap";
31#[cfg(feature = "flight-sql")]
32pub(crate) const ARROW_EXT_TYPE_VARIANT: &str = "Variant";
33#[cfg(feature = "flight-sql")]
34pub(crate) const ARROW_EXT_TYPE_BITMAP: &str = "Bitmap";
35#[cfg(feature = "flight-sql")]
36pub(crate) const ARROW_EXT_TYPE_GEOMETRY: &str = "Geometry";
37#[cfg(feature = "flight-sql")]
38pub(crate) const ARROW_EXT_TYPE_GEOGRAPHY: &str = "Geography";
39#[cfg(feature = "flight-sql")]
40pub(crate) const ARROW_EXT_TYPE_INTERVAL: &str = "Interval";
41#[cfg(feature = "flight-sql")]
42pub(crate) const ARROW_EXT_TYPE_VECTOR: &str = "Vector";
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NumberDataType {
46    UInt8,
47    UInt16,
48    UInt32,
49    UInt64,
50    Int8,
51    Int16,
52    Int32,
53    Int64,
54    Float32,
55    Float64,
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub struct DecimalSize {
60    pub precision: u8,
61    pub scale: u8,
62}
63
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub enum DecimalDataType {
66    Decimal128(DecimalSize),
67    Decimal256(DecimalSize),
68}
69
70impl DecimalDataType {
71    pub fn decimal_size(&self) -> &DecimalSize {
72        match self {
73            DecimalDataType::Decimal128(size) => size,
74            DecimalDataType::Decimal256(size) => size,
75        }
76    }
77}
78
79#[derive(Debug, Clone)]
80pub enum DataType {
81    Null,
82    EmptyArray,
83    EmptyMap,
84    Boolean,
85    Binary,
86    String,
87    Number(NumberDataType),
88    Decimal(DecimalDataType),
89    Timestamp,
90    Date,
91    Nullable(Box<DataType>),
92    Array(Box<DataType>),
93    Map(Box<DataType>),
94    Tuple(Vec<DataType>),
95    Variant,
96    Bitmap,
97    Geometry,
98    Geography,
99    Interval,
100    Vector(u64),
101    // Generic(usize),
102}
103
104impl DataType {
105    pub fn is_numeric(&self) -> bool {
106        match self {
107            DataType::Number(_) | DataType::Decimal(_) => true,
108            DataType::Nullable(inner) => inner.is_numeric(),
109            _ => false,
110        }
111    }
112}
113
114impl std::fmt::Display for DataType {
115    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
116        match self {
117            DataType::Null => write!(f, "Null"),
118            DataType::EmptyArray => write!(f, "EmptyArray"),
119            DataType::EmptyMap => write!(f, "EmptyMap"),
120            DataType::Boolean => write!(f, "Boolean"),
121            DataType::Binary => write!(f, "Binary"),
122            DataType::String => write!(f, "String"),
123            DataType::Number(n) => match n {
124                NumberDataType::UInt8 => write!(f, "UInt8"),
125                NumberDataType::UInt16 => write!(f, "UInt16"),
126                NumberDataType::UInt32 => write!(f, "UInt32"),
127                NumberDataType::UInt64 => write!(f, "UInt64"),
128                NumberDataType::Int8 => write!(f, "Int8"),
129                NumberDataType::Int16 => write!(f, "Int16"),
130                NumberDataType::Int32 => write!(f, "Int32"),
131                NumberDataType::Int64 => write!(f, "Int64"),
132                NumberDataType::Float32 => write!(f, "Float32"),
133                NumberDataType::Float64 => write!(f, "Float64"),
134            },
135            DataType::Decimal(d) => {
136                let size = d.decimal_size();
137                write!(f, "Decimal({}, {})", size.precision, size.scale)
138            }
139            DataType::Timestamp => write!(f, "Timestamp"),
140            DataType::Date => write!(f, "Date"),
141            DataType::Nullable(inner) => write!(f, "Nullable({inner})"),
142            DataType::Array(inner) => write!(f, "Array({inner})"),
143            DataType::Map(inner) => match inner.as_ref() {
144                DataType::Tuple(tys) => {
145                    write!(f, "Map({}, {})", tys[0], tys[1])
146                }
147                _ => unreachable!(),
148            },
149            DataType::Tuple(inner) => {
150                let inner = inner
151                    .iter()
152                    .map(|x| x.to_string())
153                    .collect::<Vec<_>>()
154                    .join(", ");
155                write!(f, "Tuple({inner})")
156            }
157            DataType::Variant => write!(f, "Variant"),
158            DataType::Bitmap => write!(f, "Bitmap"),
159            DataType::Geometry => write!(f, "Geometry"),
160            DataType::Geography => write!(f, "Geography"),
161            DataType::Interval => write!(f, "Interval"),
162            DataType::Vector(d) => write!(f, "Vector({d})"),
163        }
164    }
165}
166
167#[derive(Debug, Clone)]
168pub struct Field {
169    pub name: String,
170    pub data_type: DataType,
171}
172
173#[derive(Debug, Clone, Default)]
174pub struct Schema(Vec<Field>);
175
176pub type SchemaRef = Arc<Schema>;
177
178impl Schema {
179    pub fn fields(&self) -> &[Field] {
180        &self.0
181    }
182
183    pub fn from_vec(fields: Vec<Field>) -> Self {
184        Self(fields)
185    }
186}
187
188impl TryFrom<&TypeDesc<'_>> for DataType {
189    type Error = Error;
190
191    fn try_from(desc: &TypeDesc) -> Result<Self> {
192        if desc.nullable {
193            let mut desc = desc.clone();
194            desc.nullable = false;
195            let inner = DataType::try_from(&desc)?;
196            return Ok(DataType::Nullable(Box::new(inner)));
197        }
198        let dt = match desc.name {
199            "NULL" | "Null" => DataType::Null,
200            "Boolean" => DataType::Boolean,
201            "Binary" => DataType::Binary,
202            "String" => DataType::String,
203            "Int8" => DataType::Number(NumberDataType::Int8),
204            "Int16" => DataType::Number(NumberDataType::Int16),
205            "Int32" => DataType::Number(NumberDataType::Int32),
206            "Int64" => DataType::Number(NumberDataType::Int64),
207            "UInt8" => DataType::Number(NumberDataType::UInt8),
208            "UInt16" => DataType::Number(NumberDataType::UInt16),
209            "UInt32" => DataType::Number(NumberDataType::UInt32),
210            "UInt64" => DataType::Number(NumberDataType::UInt64),
211            "Float32" => DataType::Number(NumberDataType::Float32),
212            "Float64" => DataType::Number(NumberDataType::Float64),
213            "Decimal" => {
214                let precision = desc.args[0].name.parse::<u8>()?;
215                let scale = desc.args[1].name.parse::<u8>()?;
216
217                if precision <= 38 {
218                    DataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
219                        precision,
220                        scale,
221                    }))
222                } else {
223                    DataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
224                        precision,
225                        scale,
226                    }))
227                }
228            }
229            "Timestamp" => DataType::Timestamp,
230            "Date" => DataType::Date,
231            "Nullable" => {
232                if desc.args.len() != 1 {
233                    return Err(Error::Parsing(
234                        "Nullable type must have one argument".to_string(),
235                    ));
236                }
237                let mut desc = desc.clone();
238                // ignore inner NULL indicator
239                desc.nullable = false;
240                let inner = Self::try_from(&desc.args[0])?;
241                DataType::Nullable(Box::new(inner))
242            }
243            "Array" => {
244                if desc.args.len() != 1 {
245                    return Err(Error::Parsing(
246                        "Array type must have one argument".to_string(),
247                    ));
248                }
249                if desc.args[0].name == "Nothing" {
250                    DataType::EmptyArray
251                } else {
252                    let inner = Self::try_from(&desc.args[0])?;
253                    DataType::Array(Box::new(inner))
254                }
255            }
256            "Map" => {
257                if desc.args.len() == 1 && desc.args[0].name == "Nothing" {
258                    DataType::EmptyMap
259                } else {
260                    if desc.args.len() != 2 {
261                        return Err(Error::Parsing(
262                            "Map type must have two arguments".to_string(),
263                        ));
264                    }
265                    let key_ty = Self::try_from(&desc.args[0])?;
266                    let val_ty = Self::try_from(&desc.args[1])?;
267                    DataType::Map(Box::new(DataType::Tuple(vec![key_ty, val_ty])))
268                }
269            }
270            "Tuple" => {
271                let mut inner = vec![];
272                for arg in &desc.args {
273                    inner.push(Self::try_from(arg)?);
274                }
275                DataType::Tuple(inner)
276            }
277            "Variant" => DataType::Variant,
278            "Bitmap" => DataType::Bitmap,
279            "Geometry" => DataType::Geometry,
280            "Geography" => DataType::Geography,
281            "Interval" => DataType::Interval,
282            "Vector" => {
283                let dimension = desc.args[0].name.parse::<u64>()?;
284                DataType::Vector(dimension)
285            }
286            _ => return Err(Error::Parsing(format!("Unknown type: {desc:?}"))),
287        };
288        Ok(dt)
289    }
290}
291
292impl TryFrom<APISchemaField> for Field {
293    type Error = Error;
294
295    fn try_from(f: APISchemaField) -> Result<Self> {
296        let type_desc = parse_type_desc(&f.data_type)?;
297        let dt = DataType::try_from(&type_desc)?;
298        let field = Self {
299            name: f.name,
300            data_type: dt,
301        };
302        Ok(field)
303    }
304}
305
306impl TryFrom<Vec<APISchemaField>> for Schema {
307    type Error = Error;
308
309    fn try_from(fields: Vec<APISchemaField>) -> Result<Self> {
310        let fields = fields
311            .into_iter()
312            .map(Field::try_from)
313            .collect::<Result<Vec<_>>>()?;
314        Ok(Self(fields))
315    }
316}
317
318#[cfg(feature = "flight-sql")]
319impl TryFrom<&Arc<ArrowField>> for Field {
320    type Error = Error;
321
322    fn try_from(f: &Arc<ArrowField>) -> Result<Self> {
323        let mut dt = if let Some(extend_type) = f.metadata().get(EXTENSION_KEY) {
324            match extend_type.as_str() {
325                ARROW_EXT_TYPE_EMPTY_ARRAY => DataType::EmptyArray,
326                ARROW_EXT_TYPE_EMPTY_MAP => DataType::EmptyMap,
327                ARROW_EXT_TYPE_VARIANT => DataType::Variant,
328                ARROW_EXT_TYPE_BITMAP => DataType::Bitmap,
329                ARROW_EXT_TYPE_GEOMETRY => DataType::Geometry,
330                ARROW_EXT_TYPE_GEOGRAPHY => DataType::Geography,
331                ARROW_EXT_TYPE_INTERVAL => DataType::Interval,
332                ARROW_EXT_TYPE_VECTOR => match f.data_type() {
333                    ArrowDataType::FixedSizeList(field, dimension) => {
334                        let dimension = match field.data_type() {
335                            ArrowDataType::Float32 => *dimension as u64,
336                            _ => {
337                                return Err(Error::Parsing(format!(
338                                    "Unsupported FixedSizeList Arrow type: {:?}",
339                                    field.data_type()
340                                )));
341                            }
342                        };
343                        DataType::Vector(dimension)
344                    }
345                    arrow_type => {
346                        return Err(Error::Parsing(format!(
347                            "Unsupported Arrow type: {arrow_type:?}",
348                        )));
349                    }
350                },
351                _ => {
352                    return Err(Error::Parsing(format!(
353                        "Unsupported extension datatype for arrow field: {f:?}"
354                    )))
355                }
356            }
357        } else {
358            match f.data_type() {
359                ArrowDataType::Null => DataType::Null,
360                ArrowDataType::Boolean => DataType::Boolean,
361                ArrowDataType::Int8 => DataType::Number(NumberDataType::Int8),
362                ArrowDataType::Int16 => DataType::Number(NumberDataType::Int16),
363                ArrowDataType::Int32 => DataType::Number(NumberDataType::Int32),
364                ArrowDataType::Int64 => DataType::Number(NumberDataType::Int64),
365                ArrowDataType::UInt8 => DataType::Number(NumberDataType::UInt8),
366                ArrowDataType::UInt16 => DataType::Number(NumberDataType::UInt16),
367                ArrowDataType::UInt32 => DataType::Number(NumberDataType::UInt32),
368                ArrowDataType::UInt64 => DataType::Number(NumberDataType::UInt64),
369                ArrowDataType::Float32 => DataType::Number(NumberDataType::Float32),
370                ArrowDataType::Float64 => DataType::Number(NumberDataType::Float64),
371                ArrowDataType::Binary
372                | ArrowDataType::LargeBinary
373                | ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
374                ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
375                    DataType::String
376                }
377                ArrowDataType::Timestamp(_, _) => DataType::Timestamp,
378                ArrowDataType::Date32 => DataType::Date,
379                ArrowDataType::Decimal128(p, s) => {
380                    DataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
381                        precision: *p,
382                        scale: *s as u8,
383                    }))
384                }
385                ArrowDataType::Decimal256(p, s) => {
386                    DataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
387                        precision: *p,
388                        scale: *s as u8,
389                    }))
390                }
391                ArrowDataType::List(f) | ArrowDataType::LargeList(f) => {
392                    let inner_field = Field::try_from(f)?;
393                    let inner_ty = inner_field.data_type;
394                    DataType::Array(Box::new(inner_ty))
395                }
396                ArrowDataType::Map(f, _) => {
397                    let inner_field = Field::try_from(f)?;
398                    let inner_ty = inner_field.data_type;
399                    DataType::Map(Box::new(inner_ty))
400                }
401                ArrowDataType::Struct(fs) => {
402                    let mut inner_tys = Vec::with_capacity(fs.len());
403                    for f in fs {
404                        let inner_field = Field::try_from(f)?;
405                        let inner_ty = inner_field.data_type;
406                        inner_tys.push(inner_ty);
407                    }
408                    DataType::Tuple(inner_tys)
409                }
410                _ => {
411                    return Err(Error::Parsing(format!(
412                        "Unsupported datatype for arrow field: {f:?}"
413                    )))
414                }
415            }
416        };
417        if f.is_nullable() && !matches!(dt, DataType::Null) {
418            dt = DataType::Nullable(Box::new(dt));
419        }
420        Ok(Field {
421            name: f.name().to_string(),
422            data_type: dt,
423        })
424    }
425}
426
427#[cfg(feature = "flight-sql")]
428impl TryFrom<ArrowSchemaRef> for Schema {
429    type Error = Error;
430
431    fn try_from(schema_ref: ArrowSchemaRef) -> Result<Self> {
432        let fields = schema_ref
433            .fields()
434            .iter()
435            .map(Field::try_from)
436            .collect::<Result<Vec<_>>>()?;
437        Ok(Self(fields))
438    }
439}
440
441#[derive(Debug, Clone, PartialEq, Eq)]
442struct TypeDesc<'t> {
443    name: &'t str,
444    nullable: bool,
445    args: Vec<TypeDesc<'t>>,
446}
447
448fn parse_type_desc(s: &str) -> Result<TypeDesc<'_>> {
449    let mut name = "";
450    let mut args = vec![];
451    let mut depth = 0;
452    let mut start = 0;
453    let mut nullable = false;
454    for (i, c) in s.char_indices() {
455        match c {
456            '(' => {
457                if depth == 0 {
458                    name = &s[start..i];
459                    start = i + 1;
460                }
461                depth += 1;
462            }
463            ')' => {
464                depth -= 1;
465                if depth == 0 {
466                    let s = &s[start..i];
467                    if !s.is_empty() {
468                        args.push(parse_type_desc(s)?);
469                    }
470                    start = i + 1;
471                }
472            }
473            ',' => {
474                if depth == 1 {
475                    let s = &s[start..i];
476                    args.push(parse_type_desc(s)?);
477                    start = i + 1;
478                }
479            }
480            ' ' => {
481                if depth == 0 {
482                    let s = &s[start..i];
483                    if !s.is_empty() {
484                        name = s;
485                    }
486                    start = i + 1;
487                }
488            }
489            _ => {}
490        }
491    }
492    if depth != 0 {
493        return Err(Error::Parsing(format!("Invalid type desc: {s}")));
494    }
495    if start < s.len() {
496        let s = &s[start..];
497        if !s.is_empty() {
498            if name.is_empty() {
499                name = s;
500            } else if s == "NULL" {
501                nullable = true;
502            } else {
503                return Err(Error::Parsing(format!("Invalid type arg for {name}: {s}")));
504            }
505        }
506    }
507    Ok(TypeDesc {
508        name,
509        nullable,
510        args,
511    })
512}
513
514#[cfg(test)]
515mod test {
516    use std::vec;
517
518    use super::*;
519
520    #[test]
521    fn test_parse_type_desc() {
522        struct TestCase<'t> {
523            desc: &'t str,
524            input: &'t str,
525            output: TypeDesc<'t>,
526        }
527        let test_cases = vec![
528            TestCase {
529                desc: "plain type",
530                input: "String",
531                output: TypeDesc {
532                    name: "String",
533                    nullable: false,
534                    args: vec![],
535                },
536            },
537            TestCase {
538                desc: "decimal type",
539                input: "Decimal(42, 42)",
540                output: TypeDesc {
541                    name: "Decimal",
542                    nullable: false,
543                    args: vec![
544                        TypeDesc {
545                            name: "42",
546                            nullable: false,
547                            args: vec![],
548                        },
549                        TypeDesc {
550                            name: "42",
551                            nullable: false,
552                            args: vec![],
553                        },
554                    ],
555                },
556            },
557            TestCase {
558                desc: "nullable type",
559                input: "Nullable(Nothing)",
560                output: TypeDesc {
561                    name: "Nullable",
562                    nullable: false,
563                    args: vec![TypeDesc {
564                        name: "Nothing",
565                        nullable: false,
566                        args: vec![],
567                    }],
568                },
569            },
570            TestCase {
571                desc: "empty arg",
572                input: "DateTime()",
573                output: TypeDesc {
574                    name: "DateTime",
575                    nullable: false,
576                    args: vec![],
577                },
578            },
579            TestCase {
580                desc: "numeric arg",
581                input: "FixedString(42)",
582                output: TypeDesc {
583                    name: "FixedString",
584                    nullable: false,
585                    args: vec![TypeDesc {
586                        name: "42",
587                        nullable: false,
588                        args: vec![],
589                    }],
590                },
591            },
592            TestCase {
593                desc: "multiple args",
594                input: "Array(Tuple(Tuple(String, String), Tuple(String, UInt64)))",
595                output: TypeDesc {
596                    name: "Array",
597                    nullable: false,
598                    args: vec![TypeDesc {
599                        name: "Tuple",
600                        nullable: false,
601                        args: vec![
602                            TypeDesc {
603                                name: "Tuple",
604                                nullable: false,
605                                args: vec![
606                                    TypeDesc {
607                                        name: "String",
608                                        nullable: false,
609                                        args: vec![],
610                                    },
611                                    TypeDesc {
612                                        name: "String",
613                                        nullable: false,
614                                        args: vec![],
615                                    },
616                                ],
617                            },
618                            TypeDesc {
619                                name: "Tuple",
620                                nullable: false,
621                                args: vec![
622                                    TypeDesc {
623                                        name: "String",
624                                        nullable: false,
625                                        args: vec![],
626                                    },
627                                    TypeDesc {
628                                        name: "UInt64",
629                                        nullable: false,
630                                        args: vec![],
631                                    },
632                                ],
633                            },
634                        ],
635                    }],
636                },
637            },
638            TestCase {
639                desc: "map args",
640                input: "Map(String, Array(Int64))",
641                output: TypeDesc {
642                    name: "Map",
643                    nullable: false,
644                    args: vec![
645                        TypeDesc {
646                            name: "String",
647                            nullable: false,
648                            args: vec![],
649                        },
650                        TypeDesc {
651                            name: "Array",
652                            nullable: false,
653                            args: vec![TypeDesc {
654                                name: "Int64",
655                                nullable: false,
656                                args: vec![],
657                            }],
658                        },
659                    ],
660                },
661            },
662            TestCase {
663                desc: "map nullable value args",
664                input: "Nullable(Map(String, String NULL))",
665                output: TypeDesc {
666                    name: "Nullable",
667                    nullable: false,
668                    args: vec![TypeDesc {
669                        name: "Map",
670                        nullable: false,
671                        args: vec![
672                            TypeDesc {
673                                name: "String",
674                                nullable: false,
675                                args: vec![],
676                            },
677                            TypeDesc {
678                                name: "String",
679                                nullable: true,
680                                args: vec![],
681                            },
682                        ],
683                    }],
684                },
685            },
686        ];
687        for case in test_cases {
688            let output = parse_type_desc(case.input).unwrap();
689            assert_eq!(output, case.output, "{}", case.desc);
690        }
691    }
692
693    #[test]
694    fn test_parse_complex_type_with_null() {
695        struct TestCase<'t> {
696            desc: &'t str,
697            input: &'t str,
698            output: TypeDesc<'t>,
699        }
700        let test_cases = vec![
701            TestCase {
702                desc: "complex nullable type",
703                input: "Nullable(Tuple(String NULL, Array(Tuple(Array(Int32 NULL) NULL, Array(String NULL) NULL) NULL) NULL))",
704                output: TypeDesc {
705                    name: "Nullable",
706                    nullable: false,
707                    args: vec![
708                        TypeDesc {
709                            name: "Tuple",
710                            nullable: false,
711                            args: vec![
712                                TypeDesc {
713                                    name: "String",
714                                    nullable: true,
715                                    args: vec![],
716                                },
717                                TypeDesc {
718                                    name: "Array",
719                                    nullable: true,
720                                    args: vec![
721                                        TypeDesc{
722                                            name: "Tuple",
723                                            nullable: true,
724                                            args: vec![
725                                                TypeDesc {
726                                                    name: "Array",
727                                                    nullable: true,
728                                                    args: vec![
729                                                        TypeDesc {
730                                                            name: "Int32",
731                                                            nullable: true,
732                                                            args: vec![],
733                                                        },
734                                                    ],
735                                                },
736                                                TypeDesc {
737                                                    name: "Array",
738                                                    nullable: true,
739                                                    args: vec![
740                                                        TypeDesc {
741                                                            name: "String",
742                                                            nullable: true,
743                                                            args: vec![],
744                                                        },
745                                                    ],
746                                                },
747                                            ]
748                                        }
749                                    ],
750                                },
751                            ],
752                        },
753                    ],
754                },
755            },
756        ];
757        for case in test_cases {
758            let output = parse_type_desc(case.input).unwrap();
759            assert_eq!(output, case.output, "{}", case.desc);
760        }
761    }
762}