databend_driver_core/
schema.rs

1// Copyright 2021 Datafuse Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use databend_client::SchemaField as APISchemaField;
18
19use crate::error::{Error, Result};
20
21#[cfg(feature = "flight-sql")]
22use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, SchemaRef as ArrowSchemaRef};
23
24// Extension types defined by Databend
25#[cfg(feature = "flight-sql")]
26pub(crate) const EXTENSION_KEY: &str = "Extension";
27#[cfg(feature = "flight-sql")]
28pub(crate) const ARROW_EXT_TYPE_EMPTY_ARRAY: &str = "EmptyArray";
29#[cfg(feature = "flight-sql")]
30pub(crate) const ARROW_EXT_TYPE_EMPTY_MAP: &str = "EmptyMap";
31#[cfg(feature = "flight-sql")]
32pub(crate) const ARROW_EXT_TYPE_VARIANT: &str = "Variant";
33#[cfg(feature = "flight-sql")]
34pub(crate) const ARROW_EXT_TYPE_BITMAP: &str = "Bitmap";
35#[cfg(feature = "flight-sql")]
36pub(crate) const ARROW_EXT_TYPE_GEOMETRY: &str = "Geometry";
37#[cfg(feature = "flight-sql")]
38pub(crate) const ARROW_EXT_TYPE_GEOGRAPHY: &str = "Geography";
39#[cfg(feature = "flight-sql")]
40pub(crate) const ARROW_EXT_TYPE_INTERVAL: &str = "Interval";
41
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum NumberDataType {
44    UInt8,
45    UInt16,
46    UInt32,
47    UInt64,
48    Int8,
49    Int16,
50    Int32,
51    Int64,
52    Float32,
53    Float64,
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub struct DecimalSize {
58    pub precision: u8,
59    pub scale: u8,
60}
61
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum DecimalDataType {
64    Decimal128(DecimalSize),
65    Decimal256(DecimalSize),
66}
67
68impl DecimalDataType {
69    pub fn decimal_size(&self) -> &DecimalSize {
70        match self {
71            DecimalDataType::Decimal128(size) => size,
72            DecimalDataType::Decimal256(size) => size,
73        }
74    }
75}
76
77#[derive(Debug, Clone)]
78pub enum DataType {
79    Null,
80    EmptyArray,
81    EmptyMap,
82    Boolean,
83    Binary,
84    String,
85    Number(NumberDataType),
86    Decimal(DecimalDataType),
87    Timestamp,
88    Date,
89    Nullable(Box<DataType>),
90    Array(Box<DataType>),
91    Map(Box<DataType>),
92    Tuple(Vec<DataType>),
93    Variant,
94    Bitmap,
95    Geometry,
96    Geography,
97    Interval,
98    // Generic(usize),
99}
100
101impl DataType {
102    pub fn is_numeric(&self) -> bool {
103        match self {
104            DataType::Number(_) | DataType::Decimal(_) => true,
105            DataType::Nullable(inner) => inner.is_numeric(),
106            _ => false,
107        }
108    }
109}
110
111impl std::fmt::Display for DataType {
112    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
113        match self {
114            DataType::Null => write!(f, "Null"),
115            DataType::EmptyArray => write!(f, "EmptyArray"),
116            DataType::EmptyMap => write!(f, "EmptyMap"),
117            DataType::Boolean => write!(f, "Boolean"),
118            DataType::Binary => write!(f, "Binary"),
119            DataType::String => write!(f, "String"),
120            DataType::Number(n) => match n {
121                NumberDataType::UInt8 => write!(f, "UInt8"),
122                NumberDataType::UInt16 => write!(f, "UInt16"),
123                NumberDataType::UInt32 => write!(f, "UInt32"),
124                NumberDataType::UInt64 => write!(f, "UInt64"),
125                NumberDataType::Int8 => write!(f, "Int8"),
126                NumberDataType::Int16 => write!(f, "Int16"),
127                NumberDataType::Int32 => write!(f, "Int32"),
128                NumberDataType::Int64 => write!(f, "Int64"),
129                NumberDataType::Float32 => write!(f, "Float32"),
130                NumberDataType::Float64 => write!(f, "Float64"),
131            },
132            DataType::Decimal(d) => {
133                let size = d.decimal_size();
134                write!(f, "Decimal({}, {})", size.precision, size.scale)
135            }
136            DataType::Timestamp => write!(f, "Timestamp"),
137            DataType::Date => write!(f, "Date"),
138            DataType::Nullable(inner) => write!(f, "Nullable({inner})"),
139            DataType::Array(inner) => write!(f, "Array({inner})"),
140            DataType::Map(inner) => match inner.as_ref() {
141                DataType::Tuple(tys) => {
142                    write!(f, "Map({}, {})", tys[0], tys[1])
143                }
144                _ => unreachable!(),
145            },
146            DataType::Tuple(inner) => {
147                let inner = inner
148                    .iter()
149                    .map(|x| x.to_string())
150                    .collect::<Vec<_>>()
151                    .join(", ");
152                write!(f, "Tuple({inner})")
153            }
154            DataType::Variant => write!(f, "Variant"),
155            DataType::Bitmap => write!(f, "Bitmap"),
156            DataType::Geometry => write!(f, "Geometry"),
157            DataType::Geography => write!(f, "Geography"),
158            DataType::Interval => write!(f, "Interval"),
159        }
160    }
161}
162
163#[derive(Debug, Clone)]
164pub struct Field {
165    pub name: String,
166    pub data_type: DataType,
167}
168
169#[derive(Debug, Clone, Default)]
170pub struct Schema(Vec<Field>);
171
172pub type SchemaRef = Arc<Schema>;
173
174impl Schema {
175    pub fn fields(&self) -> &[Field] {
176        &self.0
177    }
178
179    pub fn from_vec(fields: Vec<Field>) -> Self {
180        Self(fields)
181    }
182}
183
184impl TryFrom<&TypeDesc<'_>> for DataType {
185    type Error = Error;
186
187    fn try_from(desc: &TypeDesc) -> Result<Self> {
188        if desc.nullable {
189            let mut desc = desc.clone();
190            desc.nullable = false;
191            let inner = DataType::try_from(&desc)?;
192            return Ok(DataType::Nullable(Box::new(inner)));
193        }
194        let dt = match desc.name {
195            "NULL" | "Null" => DataType::Null,
196            "Boolean" => DataType::Boolean,
197            "Binary" => DataType::Binary,
198            "String" => DataType::String,
199            "Int8" => DataType::Number(NumberDataType::Int8),
200            "Int16" => DataType::Number(NumberDataType::Int16),
201            "Int32" => DataType::Number(NumberDataType::Int32),
202            "Int64" => DataType::Number(NumberDataType::Int64),
203            "UInt8" => DataType::Number(NumberDataType::UInt8),
204            "UInt16" => DataType::Number(NumberDataType::UInt16),
205            "UInt32" => DataType::Number(NumberDataType::UInt32),
206            "UInt64" => DataType::Number(NumberDataType::UInt64),
207            "Float32" => DataType::Number(NumberDataType::Float32),
208            "Float64" => DataType::Number(NumberDataType::Float64),
209            "Decimal" => {
210                let precision = desc.args[0].name.parse::<u8>()?;
211                let scale = desc.args[1].name.parse::<u8>()?;
212
213                if precision <= 38 {
214                    DataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
215                        precision,
216                        scale,
217                    }))
218                } else {
219                    DataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
220                        precision,
221                        scale,
222                    }))
223                }
224            }
225            "Timestamp" => DataType::Timestamp,
226            "Date" => DataType::Date,
227            "Nullable" => {
228                if desc.args.len() != 1 {
229                    return Err(Error::Parsing(
230                        "Nullable type must have one argument".to_string(),
231                    ));
232                }
233                let mut desc = desc.clone();
234                // ignore inner NULL indicator
235                desc.nullable = false;
236                let inner = Self::try_from(&desc.args[0])?;
237                DataType::Nullable(Box::new(inner))
238            }
239            "Array" => {
240                if desc.args.len() != 1 {
241                    return Err(Error::Parsing(
242                        "Array type must have one argument".to_string(),
243                    ));
244                }
245                if desc.args[0].name == "Nothing" {
246                    DataType::EmptyArray
247                } else {
248                    let inner = Self::try_from(&desc.args[0])?;
249                    DataType::Array(Box::new(inner))
250                }
251            }
252            "Map" => {
253                if desc.args.len() == 1 && desc.args[0].name == "Nothing" {
254                    DataType::EmptyMap
255                } else {
256                    if desc.args.len() != 2 {
257                        return Err(Error::Parsing(
258                            "Map type must have two arguments".to_string(),
259                        ));
260                    }
261                    let key_ty = Self::try_from(&desc.args[0])?;
262                    let val_ty = Self::try_from(&desc.args[1])?;
263                    DataType::Map(Box::new(DataType::Tuple(vec![key_ty, val_ty])))
264                }
265            }
266            "Tuple" => {
267                let mut inner = vec![];
268                for arg in &desc.args {
269                    inner.push(Self::try_from(arg)?);
270                }
271                DataType::Tuple(inner)
272            }
273            "Variant" => DataType::Variant,
274            "Bitmap" => DataType::Bitmap,
275            "Geometry" => DataType::Geometry,
276            "Geography" => DataType::Geography,
277            "Interval" => DataType::Interval,
278            _ => return Err(Error::Parsing(format!("Unknown type: {desc:?}"))),
279        };
280        Ok(dt)
281    }
282}
283
284impl TryFrom<APISchemaField> for Field {
285    type Error = Error;
286
287    fn try_from(f: APISchemaField) -> Result<Self> {
288        let type_desc = parse_type_desc(&f.data_type)?;
289        let dt = DataType::try_from(&type_desc)?;
290        let field = Self {
291            name: f.name,
292            data_type: dt,
293        };
294        Ok(field)
295    }
296}
297
298impl TryFrom<Vec<APISchemaField>> for Schema {
299    type Error = Error;
300
301    fn try_from(fields: Vec<APISchemaField>) -> Result<Self> {
302        let fields = fields
303            .into_iter()
304            .map(Field::try_from)
305            .collect::<Result<Vec<_>>>()?;
306        Ok(Self(fields))
307    }
308}
309
310#[cfg(feature = "flight-sql")]
311impl TryFrom<&Arc<ArrowField>> for Field {
312    type Error = Error;
313
314    fn try_from(f: &Arc<ArrowField>) -> Result<Self> {
315        let mut dt = if let Some(extend_type) = f.metadata().get(EXTENSION_KEY) {
316            match extend_type.as_str() {
317                ARROW_EXT_TYPE_EMPTY_ARRAY => DataType::EmptyArray,
318                ARROW_EXT_TYPE_EMPTY_MAP => DataType::EmptyMap,
319                ARROW_EXT_TYPE_VARIANT => DataType::Variant,
320                ARROW_EXT_TYPE_BITMAP => DataType::Bitmap,
321                ARROW_EXT_TYPE_GEOMETRY => DataType::Geometry,
322                ARROW_EXT_TYPE_GEOGRAPHY => DataType::Geography,
323                _ => {
324                    return Err(Error::Parsing(format!(
325                        "Unsupported extension datatype for arrow field: {f:?}"
326                    )))
327                }
328            }
329        } else {
330            match f.data_type() {
331                ArrowDataType::Null => DataType::Null,
332                ArrowDataType::Boolean => DataType::Boolean,
333                ArrowDataType::Int8 => DataType::Number(NumberDataType::Int8),
334                ArrowDataType::Int16 => DataType::Number(NumberDataType::Int16),
335                ArrowDataType::Int32 => DataType::Number(NumberDataType::Int32),
336                ArrowDataType::Int64 => DataType::Number(NumberDataType::Int64),
337                ArrowDataType::UInt8 => DataType::Number(NumberDataType::UInt8),
338                ArrowDataType::UInt16 => DataType::Number(NumberDataType::UInt16),
339                ArrowDataType::UInt32 => DataType::Number(NumberDataType::UInt32),
340                ArrowDataType::UInt64 => DataType::Number(NumberDataType::UInt64),
341                ArrowDataType::Float32 => DataType::Number(NumberDataType::Float32),
342                ArrowDataType::Float64 => DataType::Number(NumberDataType::Float64),
343                ArrowDataType::Binary
344                | ArrowDataType::LargeBinary
345                | ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
346                ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
347                    DataType::String
348                }
349                ArrowDataType::Timestamp(_, _) => DataType::Timestamp,
350                ArrowDataType::Date32 => DataType::Date,
351                ArrowDataType::Decimal128(p, s) => {
352                    DataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
353                        precision: *p,
354                        scale: *s as u8,
355                    }))
356                }
357                ArrowDataType::Decimal256(p, s) => {
358                    DataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
359                        precision: *p,
360                        scale: *s as u8,
361                    }))
362                }
363                ArrowDataType::List(f) | ArrowDataType::LargeList(f) => {
364                    let inner_field = Field::try_from(f)?;
365                    let inner_ty = inner_field.data_type;
366                    DataType::Array(Box::new(inner_ty))
367                }
368                ArrowDataType::Map(f, _) => {
369                    let inner_field = Field::try_from(f)?;
370                    let inner_ty = inner_field.data_type;
371                    DataType::Map(Box::new(inner_ty))
372                }
373                ArrowDataType::Struct(fs) => {
374                    let mut inner_tys = Vec::with_capacity(fs.len());
375                    for f in fs {
376                        let inner_field = Field::try_from(f)?;
377                        let inner_ty = inner_field.data_type;
378                        inner_tys.push(inner_ty);
379                    }
380                    DataType::Tuple(inner_tys)
381                }
382                _ => {
383                    return Err(Error::Parsing(format!(
384                        "Unsupported datatype for arrow field: {f:?}"
385                    )))
386                }
387            }
388        };
389        if f.is_nullable() && !matches!(dt, DataType::Null) {
390            dt = DataType::Nullable(Box::new(dt));
391        }
392        Ok(Field {
393            name: f.name().to_string(),
394            data_type: dt,
395        })
396    }
397}
398
399#[cfg(feature = "flight-sql")]
400impl TryFrom<ArrowSchemaRef> for Schema {
401    type Error = Error;
402
403    fn try_from(schema_ref: ArrowSchemaRef) -> Result<Self> {
404        let fields = schema_ref
405            .fields()
406            .iter()
407            .map(Field::try_from)
408            .collect::<Result<Vec<_>>>()?;
409        Ok(Self(fields))
410    }
411}
412
413#[derive(Debug, Clone, PartialEq, Eq)]
414struct TypeDesc<'t> {
415    name: &'t str,
416    nullable: bool,
417    args: Vec<TypeDesc<'t>>,
418}
419
420fn parse_type_desc(s: &str) -> Result<TypeDesc> {
421    let mut name = "";
422    let mut args = vec![];
423    let mut depth = 0;
424    let mut start = 0;
425    let mut nullable = false;
426    for (i, c) in s.char_indices() {
427        match c {
428            '(' => {
429                if depth == 0 {
430                    name = &s[start..i];
431                    start = i + 1;
432                }
433                depth += 1;
434            }
435            ')' => {
436                depth -= 1;
437                if depth == 0 {
438                    let s = &s[start..i];
439                    if !s.is_empty() {
440                        args.push(parse_type_desc(s)?);
441                    }
442                    start = i + 1;
443                }
444            }
445            ',' => {
446                if depth == 1 {
447                    let s = &s[start..i];
448                    args.push(parse_type_desc(s)?);
449                    start = i + 1;
450                }
451            }
452            ' ' => {
453                if depth == 0 {
454                    let s = &s[start..i];
455                    if !s.is_empty() {
456                        name = s;
457                    }
458                    start = i + 1;
459                }
460            }
461            _ => {}
462        }
463    }
464    if depth != 0 {
465        return Err(Error::Parsing(format!("Invalid type desc: {s}")));
466    }
467    if start < s.len() {
468        let s = &s[start..];
469        if !s.is_empty() {
470            if name.is_empty() {
471                name = s;
472            } else if s == "NULL" {
473                nullable = true;
474            } else {
475                return Err(Error::Parsing(format!("Invalid type arg for {name}: {s}")));
476            }
477        }
478    }
479    Ok(TypeDesc {
480        name,
481        nullable,
482        args,
483    })
484}
485
486#[cfg(test)]
487mod test {
488    use std::vec;
489
490    use super::*;
491
492    #[test]
493    fn test_parse_type_desc() {
494        struct TestCase<'t> {
495            desc: &'t str,
496            input: &'t str,
497            output: TypeDesc<'t>,
498        }
499        let test_cases = vec![
500            TestCase {
501                desc: "plain type",
502                input: "String",
503                output: TypeDesc {
504                    name: "String",
505                    nullable: false,
506                    args: vec![],
507                },
508            },
509            TestCase {
510                desc: "decimal type",
511                input: "Decimal(42, 42)",
512                output: TypeDesc {
513                    name: "Decimal",
514                    nullable: false,
515                    args: vec![
516                        TypeDesc {
517                            name: "42",
518                            nullable: false,
519                            args: vec![],
520                        },
521                        TypeDesc {
522                            name: "42",
523                            nullable: false,
524                            args: vec![],
525                        },
526                    ],
527                },
528            },
529            TestCase {
530                desc: "nullable type",
531                input: "Nullable(Nothing)",
532                output: TypeDesc {
533                    name: "Nullable",
534                    nullable: false,
535                    args: vec![TypeDesc {
536                        name: "Nothing",
537                        nullable: false,
538                        args: vec![],
539                    }],
540                },
541            },
542            TestCase {
543                desc: "empty arg",
544                input: "DateTime()",
545                output: TypeDesc {
546                    name: "DateTime",
547                    nullable: false,
548                    args: vec![],
549                },
550            },
551            TestCase {
552                desc: "numeric arg",
553                input: "FixedString(42)",
554                output: TypeDesc {
555                    name: "FixedString",
556                    nullable: false,
557                    args: vec![TypeDesc {
558                        name: "42",
559                        nullable: false,
560                        args: vec![],
561                    }],
562                },
563            },
564            TestCase {
565                desc: "multiple args",
566                input: "Array(Tuple(Tuple(String, String), Tuple(String, UInt64)))",
567                output: TypeDesc {
568                    name: "Array",
569                    nullable: false,
570                    args: vec![TypeDesc {
571                        name: "Tuple",
572                        nullable: false,
573                        args: vec![
574                            TypeDesc {
575                                name: "Tuple",
576                                nullable: false,
577                                args: vec![
578                                    TypeDesc {
579                                        name: "String",
580                                        nullable: false,
581                                        args: vec![],
582                                    },
583                                    TypeDesc {
584                                        name: "String",
585                                        nullable: false,
586                                        args: vec![],
587                                    },
588                                ],
589                            },
590                            TypeDesc {
591                                name: "Tuple",
592                                nullable: false,
593                                args: vec![
594                                    TypeDesc {
595                                        name: "String",
596                                        nullable: false,
597                                        args: vec![],
598                                    },
599                                    TypeDesc {
600                                        name: "UInt64",
601                                        nullable: false,
602                                        args: vec![],
603                                    },
604                                ],
605                            },
606                        ],
607                    }],
608                },
609            },
610            TestCase {
611                desc: "map args",
612                input: "Map(String, Array(Int64))",
613                output: TypeDesc {
614                    name: "Map",
615                    nullable: false,
616                    args: vec![
617                        TypeDesc {
618                            name: "String",
619                            nullable: false,
620                            args: vec![],
621                        },
622                        TypeDesc {
623                            name: "Array",
624                            nullable: false,
625                            args: vec![TypeDesc {
626                                name: "Int64",
627                                nullable: false,
628                                args: vec![],
629                            }],
630                        },
631                    ],
632                },
633            },
634            TestCase {
635                desc: "map nullable value args",
636                input: "Nullable(Map(String, String NULL))",
637                output: TypeDesc {
638                    name: "Nullable",
639                    nullable: false,
640                    args: vec![TypeDesc {
641                        name: "Map",
642                        nullable: false,
643                        args: vec![
644                            TypeDesc {
645                                name: "String",
646                                nullable: false,
647                                args: vec![],
648                            },
649                            TypeDesc {
650                                name: "String",
651                                nullable: true,
652                                args: vec![],
653                            },
654                        ],
655                    }],
656                },
657            },
658        ];
659        for case in test_cases {
660            let output = parse_type_desc(case.input).unwrap();
661            assert_eq!(output, case.output, "{}", case.desc);
662        }
663    }
664
665    #[test]
666    fn test_parse_complex_type_with_null() {
667        struct TestCase<'t> {
668            desc: &'t str,
669            input: &'t str,
670            output: TypeDesc<'t>,
671        }
672        let test_cases = vec![
673            TestCase {
674                desc: "complex nullable type",
675                input: "Nullable(Tuple(String NULL, Array(Tuple(Array(Int32 NULL) NULL, Array(String NULL) NULL) NULL) NULL))",
676                output: TypeDesc {
677                    name: "Nullable",
678                    nullable: false,
679                    args: vec![
680                        TypeDesc {
681                            name: "Tuple",
682                            nullable: false,
683                            args: vec![
684                                TypeDesc {
685                                    name: "String",
686                                    nullable: true,
687                                    args: vec![],
688                                },
689                                TypeDesc {
690                                    name: "Array",
691                                    nullable: true,
692                                    args: vec![
693                                        TypeDesc{
694                                            name: "Tuple",
695                                            nullable: true,
696                                            args: vec![
697                                                TypeDesc {
698                                                    name: "Array",
699                                                    nullable: true,
700                                                    args: vec![
701                                                        TypeDesc {
702                                                            name: "Int32",
703                                                            nullable: true,
704                                                            args: vec![],
705                                                        },
706                                                    ],
707                                                },
708                                                TypeDesc {
709                                                    name: "Array",
710                                                    nullable: true,
711                                                    args: vec![
712                                                        TypeDesc {
713                                                            name: "String",
714                                                            nullable: true,
715                                                            args: vec![],
716                                                        },
717                                                    ],
718                                                },
719                                            ]
720                                        }
721                                    ],
722                                },
723                            ],
724                        },
725                    ],
726                },
727            },
728        ];
729        for case in test_cases {
730            let output = parse_type_desc(case.input).unwrap();
731            assert_eq!(output, case.output, "{}", case.desc);
732        }
733    }
734}