1use std::sync::Arc;
16
17use databend_client::SchemaField as APISchemaField;
18
19use crate::error::{Error, Result};
20
21#[cfg(feature = "flight-sql")]
22use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, SchemaRef as ArrowSchemaRef};
23
24#[cfg(feature = "flight-sql")]
26pub(crate) const EXTENSION_KEY: &str = "Extension";
27#[cfg(feature = "flight-sql")]
28pub(crate) const ARROW_EXT_TYPE_EMPTY_ARRAY: &str = "EmptyArray";
29#[cfg(feature = "flight-sql")]
30pub(crate) const ARROW_EXT_TYPE_EMPTY_MAP: &str = "EmptyMap";
31#[cfg(feature = "flight-sql")]
32pub(crate) const ARROW_EXT_TYPE_VARIANT: &str = "Variant";
33#[cfg(feature = "flight-sql")]
34pub(crate) const ARROW_EXT_TYPE_BITMAP: &str = "Bitmap";
35#[cfg(feature = "flight-sql")]
36pub(crate) const ARROW_EXT_TYPE_GEOMETRY: &str = "Geometry";
37#[cfg(feature = "flight-sql")]
38pub(crate) const ARROW_EXT_TYPE_GEOGRAPHY: &str = "Geography";
39#[cfg(feature = "flight-sql")]
40pub(crate) const ARROW_EXT_TYPE_INTERVAL: &str = "Interval";
41#[cfg(feature = "flight-sql")]
42pub(crate) const ARROW_EXT_TYPE_VECTOR: &str = "Vector";
43
44#[derive(Debug, Clone, PartialEq, Eq)]
45pub enum NumberDataType {
46 UInt8,
47 UInt16,
48 UInt32,
49 UInt64,
50 Int8,
51 Int16,
52 Int32,
53 Int64,
54 Float32,
55 Float64,
56}
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub struct DecimalSize {
60 pub precision: u8,
61 pub scale: u8,
62}
63
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub enum DecimalDataType {
66 Decimal128(DecimalSize),
67 Decimal256(DecimalSize),
68}
69
70impl DecimalDataType {
71 pub fn decimal_size(&self) -> &DecimalSize {
72 match self {
73 DecimalDataType::Decimal128(size) => size,
74 DecimalDataType::Decimal256(size) => size,
75 }
76 }
77}
78
79#[derive(Debug, Clone)]
80pub enum DataType {
81 Null,
82 EmptyArray,
83 EmptyMap,
84 Boolean,
85 Binary,
86 String,
87 Number(NumberDataType),
88 Decimal(DecimalDataType),
89 Timestamp,
90 Date,
91 Nullable(Box<DataType>),
92 Array(Box<DataType>),
93 Map(Box<DataType>),
94 Tuple(Vec<DataType>),
95 Variant,
96 Bitmap,
97 Geometry,
98 Geography,
99 Interval,
100 Vector(u64),
101 }
103
104impl DataType {
105 pub fn is_numeric(&self) -> bool {
106 match self {
107 DataType::Number(_) | DataType::Decimal(_) => true,
108 DataType::Nullable(inner) => inner.is_numeric(),
109 _ => false,
110 }
111 }
112}
113
114impl std::fmt::Display for DataType {
115 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
116 match self {
117 DataType::Null => write!(f, "Null"),
118 DataType::EmptyArray => write!(f, "EmptyArray"),
119 DataType::EmptyMap => write!(f, "EmptyMap"),
120 DataType::Boolean => write!(f, "Boolean"),
121 DataType::Binary => write!(f, "Binary"),
122 DataType::String => write!(f, "String"),
123 DataType::Number(n) => match n {
124 NumberDataType::UInt8 => write!(f, "UInt8"),
125 NumberDataType::UInt16 => write!(f, "UInt16"),
126 NumberDataType::UInt32 => write!(f, "UInt32"),
127 NumberDataType::UInt64 => write!(f, "UInt64"),
128 NumberDataType::Int8 => write!(f, "Int8"),
129 NumberDataType::Int16 => write!(f, "Int16"),
130 NumberDataType::Int32 => write!(f, "Int32"),
131 NumberDataType::Int64 => write!(f, "Int64"),
132 NumberDataType::Float32 => write!(f, "Float32"),
133 NumberDataType::Float64 => write!(f, "Float64"),
134 },
135 DataType::Decimal(d) => {
136 let size = d.decimal_size();
137 write!(f, "Decimal({}, {})", size.precision, size.scale)
138 }
139 DataType::Timestamp => write!(f, "Timestamp"),
140 DataType::Date => write!(f, "Date"),
141 DataType::Nullable(inner) => write!(f, "Nullable({inner})"),
142 DataType::Array(inner) => write!(f, "Array({inner})"),
143 DataType::Map(inner) => match inner.as_ref() {
144 DataType::Tuple(tys) => {
145 write!(f, "Map({}, {})", tys[0], tys[1])
146 }
147 _ => unreachable!(),
148 },
149 DataType::Tuple(inner) => {
150 let inner = inner
151 .iter()
152 .map(|x| x.to_string())
153 .collect::<Vec<_>>()
154 .join(", ");
155 write!(f, "Tuple({inner})")
156 }
157 DataType::Variant => write!(f, "Variant"),
158 DataType::Bitmap => write!(f, "Bitmap"),
159 DataType::Geometry => write!(f, "Geometry"),
160 DataType::Geography => write!(f, "Geography"),
161 DataType::Interval => write!(f, "Interval"),
162 DataType::Vector(d) => write!(f, "Vector({d})"),
163 }
164 }
165}
166
167#[derive(Debug, Clone)]
168pub struct Field {
169 pub name: String,
170 pub data_type: DataType,
171}
172
173#[derive(Debug, Clone, Default)]
174pub struct Schema(Vec<Field>);
175
176pub type SchemaRef = Arc<Schema>;
177
178impl Schema {
179 pub fn fields(&self) -> &[Field] {
180 &self.0
181 }
182
183 pub fn from_vec(fields: Vec<Field>) -> Self {
184 Self(fields)
185 }
186}
187
188impl TryFrom<&TypeDesc<'_>> for DataType {
189 type Error = Error;
190
191 fn try_from(desc: &TypeDesc) -> Result<Self> {
192 if desc.nullable {
193 let mut desc = desc.clone();
194 desc.nullable = false;
195 let inner = DataType::try_from(&desc)?;
196 return Ok(DataType::Nullable(Box::new(inner)));
197 }
198 let dt = match desc.name {
199 "NULL" | "Null" => DataType::Null,
200 "Boolean" => DataType::Boolean,
201 "Binary" => DataType::Binary,
202 "String" => DataType::String,
203 "Int8" => DataType::Number(NumberDataType::Int8),
204 "Int16" => DataType::Number(NumberDataType::Int16),
205 "Int32" => DataType::Number(NumberDataType::Int32),
206 "Int64" => DataType::Number(NumberDataType::Int64),
207 "UInt8" => DataType::Number(NumberDataType::UInt8),
208 "UInt16" => DataType::Number(NumberDataType::UInt16),
209 "UInt32" => DataType::Number(NumberDataType::UInt32),
210 "UInt64" => DataType::Number(NumberDataType::UInt64),
211 "Float32" => DataType::Number(NumberDataType::Float32),
212 "Float64" => DataType::Number(NumberDataType::Float64),
213 "Decimal" => {
214 let precision = desc.args[0].name.parse::<u8>()?;
215 let scale = desc.args[1].name.parse::<u8>()?;
216
217 if precision <= 38 {
218 DataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
219 precision,
220 scale,
221 }))
222 } else {
223 DataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
224 precision,
225 scale,
226 }))
227 }
228 }
229 "Timestamp" => DataType::Timestamp,
230 "Date" => DataType::Date,
231 "Nullable" => {
232 if desc.args.len() != 1 {
233 return Err(Error::Parsing(
234 "Nullable type must have one argument".to_string(),
235 ));
236 }
237 let mut desc = desc.clone();
238 desc.nullable = false;
240 let inner = Self::try_from(&desc.args[0])?;
241 DataType::Nullable(Box::new(inner))
242 }
243 "Array" => {
244 if desc.args.len() != 1 {
245 return Err(Error::Parsing(
246 "Array type must have one argument".to_string(),
247 ));
248 }
249 if desc.args[0].name == "Nothing" {
250 DataType::EmptyArray
251 } else {
252 let inner = Self::try_from(&desc.args[0])?;
253 DataType::Array(Box::new(inner))
254 }
255 }
256 "Map" => {
257 if desc.args.len() == 1 && desc.args[0].name == "Nothing" {
258 DataType::EmptyMap
259 } else {
260 if desc.args.len() != 2 {
261 return Err(Error::Parsing(
262 "Map type must have two arguments".to_string(),
263 ));
264 }
265 let key_ty = Self::try_from(&desc.args[0])?;
266 let val_ty = Self::try_from(&desc.args[1])?;
267 DataType::Map(Box::new(DataType::Tuple(vec![key_ty, val_ty])))
268 }
269 }
270 "Tuple" => {
271 let mut inner = vec![];
272 for arg in &desc.args {
273 inner.push(Self::try_from(arg)?);
274 }
275 DataType::Tuple(inner)
276 }
277 "Variant" => DataType::Variant,
278 "Bitmap" => DataType::Bitmap,
279 "Geometry" => DataType::Geometry,
280 "Geography" => DataType::Geography,
281 "Interval" => DataType::Interval,
282 "Vector" => {
283 let dimension = desc.args[0].name.parse::<u64>()?;
284 DataType::Vector(dimension)
285 }
286 _ => return Err(Error::Parsing(format!("Unknown type: {desc:?}"))),
287 };
288 Ok(dt)
289 }
290}
291
292impl TryFrom<APISchemaField> for Field {
293 type Error = Error;
294
295 fn try_from(f: APISchemaField) -> Result<Self> {
296 let type_desc = parse_type_desc(&f.data_type)?;
297 let dt = DataType::try_from(&type_desc)?;
298 let field = Self {
299 name: f.name,
300 data_type: dt,
301 };
302 Ok(field)
303 }
304}
305
306impl TryFrom<Vec<APISchemaField>> for Schema {
307 type Error = Error;
308
309 fn try_from(fields: Vec<APISchemaField>) -> Result<Self> {
310 let fields = fields
311 .into_iter()
312 .map(Field::try_from)
313 .collect::<Result<Vec<_>>>()?;
314 Ok(Self(fields))
315 }
316}
317
318#[cfg(feature = "flight-sql")]
319impl TryFrom<&Arc<ArrowField>> for Field {
320 type Error = Error;
321
322 fn try_from(f: &Arc<ArrowField>) -> Result<Self> {
323 let mut dt = if let Some(extend_type) = f.metadata().get(EXTENSION_KEY) {
324 match extend_type.as_str() {
325 ARROW_EXT_TYPE_EMPTY_ARRAY => DataType::EmptyArray,
326 ARROW_EXT_TYPE_EMPTY_MAP => DataType::EmptyMap,
327 ARROW_EXT_TYPE_VARIANT => DataType::Variant,
328 ARROW_EXT_TYPE_BITMAP => DataType::Bitmap,
329 ARROW_EXT_TYPE_GEOMETRY => DataType::Geometry,
330 ARROW_EXT_TYPE_GEOGRAPHY => DataType::Geography,
331 ARROW_EXT_TYPE_INTERVAL => DataType::Interval,
332 ARROW_EXT_TYPE_VECTOR => match f.data_type() {
333 ArrowDataType::FixedSizeList(field, dimension) => {
334 let dimension = match field.data_type() {
335 ArrowDataType::Float32 => *dimension as u64,
336 _ => {
337 return Err(Error::Parsing(format!(
338 "Unsupported FixedSizeList Arrow type: {:?}",
339 field.data_type()
340 )));
341 }
342 };
343 DataType::Vector(dimension)
344 }
345 arrow_type => {
346 return Err(Error::Parsing(format!(
347 "Unsupported Arrow type: {arrow_type:?}",
348 )));
349 }
350 },
351 _ => {
352 return Err(Error::Parsing(format!(
353 "Unsupported extension datatype for arrow field: {f:?}"
354 )))
355 }
356 }
357 } else {
358 match f.data_type() {
359 ArrowDataType::Null => DataType::Null,
360 ArrowDataType::Boolean => DataType::Boolean,
361 ArrowDataType::Int8 => DataType::Number(NumberDataType::Int8),
362 ArrowDataType::Int16 => DataType::Number(NumberDataType::Int16),
363 ArrowDataType::Int32 => DataType::Number(NumberDataType::Int32),
364 ArrowDataType::Int64 => DataType::Number(NumberDataType::Int64),
365 ArrowDataType::UInt8 => DataType::Number(NumberDataType::UInt8),
366 ArrowDataType::UInt16 => DataType::Number(NumberDataType::UInt16),
367 ArrowDataType::UInt32 => DataType::Number(NumberDataType::UInt32),
368 ArrowDataType::UInt64 => DataType::Number(NumberDataType::UInt64),
369 ArrowDataType::Float32 => DataType::Number(NumberDataType::Float32),
370 ArrowDataType::Float64 => DataType::Number(NumberDataType::Float64),
371 ArrowDataType::Binary
372 | ArrowDataType::LargeBinary
373 | ArrowDataType::FixedSizeBinary(_) => DataType::Binary,
374 ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
375 DataType::String
376 }
377 ArrowDataType::Timestamp(_, _) => DataType::Timestamp,
378 ArrowDataType::Date32 => DataType::Date,
379 ArrowDataType::Decimal128(p, s) => {
380 DataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
381 precision: *p,
382 scale: *s as u8,
383 }))
384 }
385 ArrowDataType::Decimal256(p, s) => {
386 DataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
387 precision: *p,
388 scale: *s as u8,
389 }))
390 }
391 ArrowDataType::List(f) | ArrowDataType::LargeList(f) => {
392 let inner_field = Field::try_from(f)?;
393 let inner_ty = inner_field.data_type;
394 DataType::Array(Box::new(inner_ty))
395 }
396 ArrowDataType::Map(f, _) => {
397 let inner_field = Field::try_from(f)?;
398 let inner_ty = inner_field.data_type;
399 DataType::Map(Box::new(inner_ty))
400 }
401 ArrowDataType::Struct(fs) => {
402 let mut inner_tys = Vec::with_capacity(fs.len());
403 for f in fs {
404 let inner_field = Field::try_from(f)?;
405 let inner_ty = inner_field.data_type;
406 inner_tys.push(inner_ty);
407 }
408 DataType::Tuple(inner_tys)
409 }
410 _ => {
411 return Err(Error::Parsing(format!(
412 "Unsupported datatype for arrow field: {f:?}"
413 )))
414 }
415 }
416 };
417 if f.is_nullable() && !matches!(dt, DataType::Null) {
418 dt = DataType::Nullable(Box::new(dt));
419 }
420 Ok(Field {
421 name: f.name().to_string(),
422 data_type: dt,
423 })
424 }
425}
426
427#[cfg(feature = "flight-sql")]
428impl TryFrom<ArrowSchemaRef> for Schema {
429 type Error = Error;
430
431 fn try_from(schema_ref: ArrowSchemaRef) -> Result<Self> {
432 let fields = schema_ref
433 .fields()
434 .iter()
435 .map(Field::try_from)
436 .collect::<Result<Vec<_>>>()?;
437 Ok(Self(fields))
438 }
439}
440
441#[derive(Debug, Clone, PartialEq, Eq)]
442struct TypeDesc<'t> {
443 name: &'t str,
444 nullable: bool,
445 args: Vec<TypeDesc<'t>>,
446}
447
448fn parse_type_desc(s: &str) -> Result<TypeDesc<'_>> {
449 let mut name = "";
450 let mut args = vec![];
451 let mut depth = 0;
452 let mut start = 0;
453 let mut nullable = false;
454 for (i, c) in s.char_indices() {
455 match c {
456 '(' => {
457 if depth == 0 {
458 name = &s[start..i];
459 start = i + 1;
460 }
461 depth += 1;
462 }
463 ')' => {
464 depth -= 1;
465 if depth == 0 {
466 let s = &s[start..i];
467 if !s.is_empty() {
468 args.push(parse_type_desc(s)?);
469 }
470 start = i + 1;
471 }
472 }
473 ',' => {
474 if depth == 1 {
475 let s = &s[start..i];
476 args.push(parse_type_desc(s)?);
477 start = i + 1;
478 }
479 }
480 ' ' => {
481 if depth == 0 {
482 let s = &s[start..i];
483 if !s.is_empty() {
484 name = s;
485 }
486 start = i + 1;
487 }
488 }
489 _ => {}
490 }
491 }
492 if depth != 0 {
493 return Err(Error::Parsing(format!("Invalid type desc: {s}")));
494 }
495 if start < s.len() {
496 let s = &s[start..];
497 if !s.is_empty() {
498 if name.is_empty() {
499 name = s;
500 } else if s == "NULL" {
501 nullable = true;
502 } else {
503 return Err(Error::Parsing(format!("Invalid type arg for {name}: {s}")));
504 }
505 }
506 }
507 Ok(TypeDesc {
508 name,
509 nullable,
510 args,
511 })
512}
513
514#[cfg(test)]
515mod test {
516 use std::vec;
517
518 use super::*;
519
520 #[test]
521 fn test_parse_type_desc() {
522 struct TestCase<'t> {
523 desc: &'t str,
524 input: &'t str,
525 output: TypeDesc<'t>,
526 }
527 let test_cases = vec![
528 TestCase {
529 desc: "plain type",
530 input: "String",
531 output: TypeDesc {
532 name: "String",
533 nullable: false,
534 args: vec![],
535 },
536 },
537 TestCase {
538 desc: "decimal type",
539 input: "Decimal(42, 42)",
540 output: TypeDesc {
541 name: "Decimal",
542 nullable: false,
543 args: vec![
544 TypeDesc {
545 name: "42",
546 nullable: false,
547 args: vec![],
548 },
549 TypeDesc {
550 name: "42",
551 nullable: false,
552 args: vec![],
553 },
554 ],
555 },
556 },
557 TestCase {
558 desc: "nullable type",
559 input: "Nullable(Nothing)",
560 output: TypeDesc {
561 name: "Nullable",
562 nullable: false,
563 args: vec![TypeDesc {
564 name: "Nothing",
565 nullable: false,
566 args: vec![],
567 }],
568 },
569 },
570 TestCase {
571 desc: "empty arg",
572 input: "DateTime()",
573 output: TypeDesc {
574 name: "DateTime",
575 nullable: false,
576 args: vec![],
577 },
578 },
579 TestCase {
580 desc: "numeric arg",
581 input: "FixedString(42)",
582 output: TypeDesc {
583 name: "FixedString",
584 nullable: false,
585 args: vec![TypeDesc {
586 name: "42",
587 nullable: false,
588 args: vec![],
589 }],
590 },
591 },
592 TestCase {
593 desc: "multiple args",
594 input: "Array(Tuple(Tuple(String, String), Tuple(String, UInt64)))",
595 output: TypeDesc {
596 name: "Array",
597 nullable: false,
598 args: vec![TypeDesc {
599 name: "Tuple",
600 nullable: false,
601 args: vec![
602 TypeDesc {
603 name: "Tuple",
604 nullable: false,
605 args: vec![
606 TypeDesc {
607 name: "String",
608 nullable: false,
609 args: vec![],
610 },
611 TypeDesc {
612 name: "String",
613 nullable: false,
614 args: vec![],
615 },
616 ],
617 },
618 TypeDesc {
619 name: "Tuple",
620 nullable: false,
621 args: vec![
622 TypeDesc {
623 name: "String",
624 nullable: false,
625 args: vec![],
626 },
627 TypeDesc {
628 name: "UInt64",
629 nullable: false,
630 args: vec![],
631 },
632 ],
633 },
634 ],
635 }],
636 },
637 },
638 TestCase {
639 desc: "map args",
640 input: "Map(String, Array(Int64))",
641 output: TypeDesc {
642 name: "Map",
643 nullable: false,
644 args: vec![
645 TypeDesc {
646 name: "String",
647 nullable: false,
648 args: vec![],
649 },
650 TypeDesc {
651 name: "Array",
652 nullable: false,
653 args: vec![TypeDesc {
654 name: "Int64",
655 nullable: false,
656 args: vec![],
657 }],
658 },
659 ],
660 },
661 },
662 TestCase {
663 desc: "map nullable value args",
664 input: "Nullable(Map(String, String NULL))",
665 output: TypeDesc {
666 name: "Nullable",
667 nullable: false,
668 args: vec![TypeDesc {
669 name: "Map",
670 nullable: false,
671 args: vec![
672 TypeDesc {
673 name: "String",
674 nullable: false,
675 args: vec![],
676 },
677 TypeDesc {
678 name: "String",
679 nullable: true,
680 args: vec![],
681 },
682 ],
683 }],
684 },
685 },
686 ];
687 for case in test_cases {
688 let output = parse_type_desc(case.input).unwrap();
689 assert_eq!(output, case.output, "{}", case.desc);
690 }
691 }
692
693 #[test]
694 fn test_parse_complex_type_with_null() {
695 struct TestCase<'t> {
696 desc: &'t str,
697 input: &'t str,
698 output: TypeDesc<'t>,
699 }
700 let test_cases = vec![
701 TestCase {
702 desc: "complex nullable type",
703 input: "Nullable(Tuple(String NULL, Array(Tuple(Array(Int32 NULL) NULL, Array(String NULL) NULL) NULL) NULL))",
704 output: TypeDesc {
705 name: "Nullable",
706 nullable: false,
707 args: vec![
708 TypeDesc {
709 name: "Tuple",
710 nullable: false,
711 args: vec![
712 TypeDesc {
713 name: "String",
714 nullable: true,
715 args: vec![],
716 },
717 TypeDesc {
718 name: "Array",
719 nullable: true,
720 args: vec![
721 TypeDesc{
722 name: "Tuple",
723 nullable: true,
724 args: vec![
725 TypeDesc {
726 name: "Array",
727 nullable: true,
728 args: vec![
729 TypeDesc {
730 name: "Int32",
731 nullable: true,
732 args: vec![],
733 },
734 ],
735 },
736 TypeDesc {
737 name: "Array",
738 nullable: true,
739 args: vec![
740 TypeDesc {
741 name: "String",
742 nullable: true,
743 args: vec![],
744 },
745 ],
746 },
747 ]
748 }
749 ],
750 },
751 ],
752 },
753 ],
754 },
755 },
756 ];
757 for case in test_cases {
758 let output = parse_type_desc(case.input).unwrap();
759 assert_eq!(output, case.output, "{}", case.desc);
760 }
761 }
762}