1use crate::error::TypesError;
2use std::collections::HashMap;
3use std::fmt::{Display, Formatter};
4
5#[derive(Debug, Clone, PartialEq)]
8pub struct Column {
9 pub name: String,
11 pub data_type: DataTypeNode,
13}
14
15impl Column {
16 #[allow(missing_docs)]
17 pub fn new(name: String, data_type: DataTypeNode) -> Self {
18 Self { name, data_type }
19 }
20}
21
22impl Display for Column {
23 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
24 write!(f, "{}: {}", self.name, self.data_type)
25 }
26}
27
28#[derive(Debug, Clone, PartialEq)]
31#[non_exhaustive]
32#[allow(missing_docs)]
33pub enum DataTypeNode {
34 Bool,
35
36 UInt8,
37 UInt16,
38 UInt32,
39 UInt64,
40 UInt128,
41 UInt256,
42
43 Int8,
44 Int16,
45 Int32,
46 Int64,
47 Int128,
48 Int256,
49
50 Float32,
51 Float64,
52 BFloat16,
53
54 Decimal(u8, u8, DecimalType),
56
57 String,
58 FixedString(usize),
59 UUID,
60
61 Date,
62 Date32,
63
64 DateTime(Option<String>),
66 DateTime64(DateTimePrecision, Option<String>),
68
69 Time,
71 Time64(DateTimePrecision),
73
74 Interval(IntervalType),
75
76 IPv4,
77 IPv6,
78
79 Nullable(Box<DataTypeNode>),
80 LowCardinality(Box<DataTypeNode>),
81
82 Array(Box<DataTypeNode>),
83 Tuple(Vec<DataTypeNode>),
84 Enum(EnumType, HashMap<i16, String>),
85
86 Map([Box<DataTypeNode>; 2]),
88
89 AggregateFunction(String, Vec<DataTypeNode>),
91
92 Variant(Vec<DataTypeNode>),
94
95 Dynamic,
96 JSON,
97
98 Point,
99 Ring,
100 LineString,
101 MultiLineString,
102 Polygon,
103 MultiPolygon,
104}
105
106impl DataTypeNode {
107 pub fn new(name: &str) -> Result<Self, TypesError> {
111 match name {
112 "UInt8" => Ok(Self::UInt8),
113 "UInt16" => Ok(Self::UInt16),
114 "UInt32" => Ok(Self::UInt32),
115 "UInt64" => Ok(Self::UInt64),
116 "UInt128" => Ok(Self::UInt128),
117 "UInt256" => Ok(Self::UInt256),
118 "Int8" => Ok(Self::Int8),
119 "Int16" => Ok(Self::Int16),
120 "Int32" => Ok(Self::Int32),
121 "Int64" => Ok(Self::Int64),
122 "Int128" => Ok(Self::Int128),
123 "Int256" => Ok(Self::Int256),
124 "Float32" => Ok(Self::Float32),
125 "Float64" => Ok(Self::Float64),
126 "BFloat16" => Ok(Self::BFloat16),
127 "String" => Ok(Self::String),
128 "UUID" => Ok(Self::UUID),
129 "Date" => Ok(Self::Date),
130 "Date32" => Ok(Self::Date32),
131 "IPv4" => Ok(Self::IPv4),
132 "IPv6" => Ok(Self::IPv6),
133 "Bool" => Ok(Self::Bool),
134 "Dynamic" => Ok(Self::Dynamic),
135 "JSON" => Ok(Self::JSON),
136 "Point" => Ok(Self::Point),
137 "Ring" => Ok(Self::Ring),
138 "LineString" => Ok(Self::LineString),
139 "MultiLineString" => Ok(Self::MultiLineString),
140 "Polygon" => Ok(Self::Polygon),
141 "MultiPolygon" => Ok(Self::MultiPolygon),
142
143 str if str.starts_with("Decimal") => parse_decimal(str),
144 str if str.starts_with("DateTime64") => parse_datetime64(str),
145 str if str.starts_with("DateTime") => parse_datetime(str),
146 str if str.starts_with("Time64") => parse_time64(str),
147 str if str.starts_with("Time") => Ok(Self::Time),
148 str if str.starts_with("Interval") => Ok(Self::Interval(str[8..].parse()?)),
149
150 str if str.starts_with("Nullable") => parse_nullable(str),
151 str if str.starts_with("LowCardinality") => parse_low_cardinality(str),
152 str if str.starts_with("FixedString") => parse_fixed_string(str),
153
154 str if str.starts_with("Array") => parse_array(str),
155 str if str.starts_with("Enum") => parse_enum(str),
156 str if str.starts_with("Map") => parse_map(str),
157 str if str.starts_with("Tuple") => parse_tuple(str),
158 str if str.starts_with("Variant") => parse_variant(str),
159
160 str => Err(TypesError::TypeParsingError(format!(
162 "Unknown data type: {str}"
163 ))),
164 }
165 }
166
167 pub fn remove_low_cardinality(&self) -> &DataTypeNode {
169 match self {
170 DataTypeNode::LowCardinality(inner) => inner,
171 _ => self,
172 }
173 }
174}
175
176impl From<DataTypeNode> for String {
177 fn from(value: DataTypeNode) -> Self {
178 value.to_string()
179 }
180}
181
182impl Display for DataTypeNode {
183 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
184 use DataTypeNode::*;
185 match self {
186 UInt8 => write!(f, "UInt8"),
187 UInt16 => write!(f, "UInt16"),
188 UInt32 => write!(f, "UInt32"),
189 UInt64 => write!(f, "UInt64"),
190 UInt128 => write!(f, "UInt128"),
191 UInt256 => write!(f, "UInt256"),
192 Int8 => write!(f, "Int8"),
193 Int16 => write!(f, "Int16"),
194 Int32 => write!(f, "Int32"),
195 Int64 => write!(f, "Int64"),
196 Int128 => write!(f, "Int128"),
197 Int256 => write!(f, "Int256"),
198 Float32 => write!(f, "Float32"),
199 Float64 => write!(f, "Float64"),
200 BFloat16 => write!(f, "BFloat16"),
201 Decimal(precision, scale, _) => {
202 write!(f, "Decimal({precision}, {scale})")
203 }
204 String => write!(f, "String"),
205 UUID => write!(f, "UUID"),
206 Date => write!(f, "Date"),
207 Date32 => write!(f, "Date32"),
208 DateTime(None) => write!(f, "DateTime"),
209 DateTime(Some(tz)) => write!(f, "DateTime('{tz}')"),
210 DateTime64(precision, None) => write!(f, "DateTime64({precision})"),
211 DateTime64(precision, Some(tz)) => write!(f, "DateTime64({precision}, '{tz}')"),
212 Time => write!(f, "Time"),
213 Time64(precision) => write!(f, "Time64({precision})"),
214 Interval(interval) => write!(f, "Interval{interval}"),
215 IPv4 => write!(f, "IPv4"),
216 IPv6 => write!(f, "IPv6"),
217 Bool => write!(f, "Bool"),
218 Nullable(inner) => write!(f, "Nullable({inner})"),
219 Array(inner) => write!(f, "Array({inner})"),
220 Tuple(elements) => {
221 write!(f, "Tuple(")?;
222 for (i, element) in elements.iter().enumerate() {
223 if i > 0 {
224 write!(f, ", ")?;
225 }
226 write!(f, "{element}")?;
227 }
228 write!(f, ")")
229 }
230 Map([key, value]) => {
231 write!(f, "Map({key}, {value})")
232 }
233 LowCardinality(inner) => {
234 write!(f, "LowCardinality({inner})")
235 }
236 Enum(enum_type, values) => {
237 let mut values_vec = values.iter().collect::<Vec<_>>();
238 values_vec.sort_by(|(i1, _), (i2, _)| (*i1).cmp(*i2));
239 write!(f, "{enum_type}(")?;
240 for (i, (index, name)) in values_vec.iter().enumerate() {
241 if i > 0 {
242 write!(f, ", ")?;
243 }
244 write!(f, "'{name}' = {index}")?;
245 }
246 write!(f, ")")
247 }
248 AggregateFunction(func_name, args) => {
249 write!(f, "AggregateFunction({func_name}, ")?;
250 for (i, element) in args.iter().enumerate() {
251 if i > 0 {
252 write!(f, ", ")?;
253 }
254 write!(f, "{element}")?;
255 }
256 write!(f, ")")
257 }
258 FixedString(size) => {
259 write!(f, "FixedString({size})")
260 }
261 Variant(types) => {
262 write!(f, "Variant(")?;
263 for (i, element) in types.iter().enumerate() {
264 if i > 0 {
265 write!(f, ", ")?;
266 }
267 write!(f, "{element}")?;
268 }
269 write!(f, ")")
270 }
271 JSON => write!(f, "JSON"),
272 Dynamic => write!(f, "Dynamic"),
273 Point => write!(f, "Point"),
274 Ring => write!(f, "Ring"),
275 LineString => write!(f, "LineString"),
276 MultiLineString => write!(f, "MultiLineString"),
277 Polygon => write!(f, "Polygon"),
278 MultiPolygon => write!(f, "MultiPolygon"),
279 }
280 }
281}
282
283#[derive(Debug, Clone, PartialEq)]
285pub enum EnumType {
286 Enum8,
288 Enum16,
290}
291
292impl Display for EnumType {
293 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
294 match self {
295 EnumType::Enum8 => write!(f, "Enum8"),
296 EnumType::Enum16 => write!(f, "Enum16"),
297 }
298 }
299}
300
301#[derive(Debug, Clone, PartialEq)]
305#[allow(missing_docs)]
306pub enum DateTimePrecision {
307 Precision0,
308 Precision1,
309 Precision2,
310 Precision3,
311 Precision4,
312 Precision5,
313 Precision6,
314 Precision7,
315 Precision8,
316 Precision9,
317}
318
319impl DateTimePrecision {
320 pub(crate) fn new(char: char) -> Result<DateTimePrecision, TypesError> {
321 match char {
322 '0' => Ok(DateTimePrecision::Precision0),
323 '1' => Ok(DateTimePrecision::Precision1),
324 '2' => Ok(DateTimePrecision::Precision2),
325 '3' => Ok(DateTimePrecision::Precision3),
326 '4' => Ok(DateTimePrecision::Precision4),
327 '5' => Ok(DateTimePrecision::Precision5),
328 '6' => Ok(DateTimePrecision::Precision6),
329 '7' => Ok(DateTimePrecision::Precision7),
330 '8' => Ok(DateTimePrecision::Precision8),
331 '9' => Ok(DateTimePrecision::Precision9),
332 _ => Err(TypesError::TypeParsingError(format!(
333 "Invalid DateTime64 precision, expected to be within [0, 9] interval, got {char}"
334 ))),
335 }
336 }
337}
338
339#[derive(Debug, Clone, PartialEq)]
342pub enum DecimalType {
343 Decimal32,
345 Decimal64,
347 Decimal128,
349 Decimal256,
351}
352
353impl Display for DecimalType {
354 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
355 match self {
356 DecimalType::Decimal32 => write!(f, "Decimal32"),
357 DecimalType::Decimal64 => write!(f, "Decimal64"),
358 DecimalType::Decimal128 => write!(f, "Decimal128"),
359 DecimalType::Decimal256 => write!(f, "Decimal256"),
360 }
361 }
362}
363
364impl DecimalType {
365 pub(crate) fn new(precision: u8) -> Result<Self, TypesError> {
366 if precision <= 9 {
367 Ok(DecimalType::Decimal32)
368 } else if precision <= 18 {
369 Ok(DecimalType::Decimal64)
370 } else if precision <= 38 {
371 Ok(DecimalType::Decimal128)
372 } else if precision <= 76 {
373 Ok(DecimalType::Decimal256)
374 } else {
375 Err(TypesError::TypeParsingError(format!(
376 "Invalid Decimal precision: {precision}"
377 )))
378 }
379 }
380}
381
382impl Display for DateTimePrecision {
383 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
384 match self {
385 DateTimePrecision::Precision0 => write!(f, "0"),
386 DateTimePrecision::Precision1 => write!(f, "1"),
387 DateTimePrecision::Precision2 => write!(f, "2"),
388 DateTimePrecision::Precision3 => write!(f, "3"),
389 DateTimePrecision::Precision4 => write!(f, "4"),
390 DateTimePrecision::Precision5 => write!(f, "5"),
391 DateTimePrecision::Precision6 => write!(f, "6"),
392 DateTimePrecision::Precision7 => write!(f, "7"),
393 DateTimePrecision::Precision8 => write!(f, "8"),
394 DateTimePrecision::Precision9 => write!(f, "9"),
395 }
396 }
397}
398
399#[derive(Debug, Clone, PartialEq)]
402#[allow(missing_docs)]
403pub enum IntervalType {
404 Nanosecond,
405 Microsecond,
406 Millisecond,
407 Second,
408 Minute,
409 Hour,
410 Day,
411 Week,
412 Month,
413 Quarter,
414 Year,
415}
416
417impl std::str::FromStr for IntervalType {
418 type Err = TypesError;
419
420 fn from_str(s: &str) -> Result<Self, Self::Err> {
421 match s {
422 "Nanosecond" => Ok(IntervalType::Nanosecond),
423 "Microsecond" => Ok(IntervalType::Microsecond),
424 "Millisecond" => Ok(IntervalType::Millisecond),
425 "Second" => Ok(IntervalType::Second),
426 "Minute" => Ok(IntervalType::Minute),
427 "Hour" => Ok(IntervalType::Hour),
428 "Day" => Ok(IntervalType::Day),
429 "Week" => Ok(IntervalType::Week),
430 "Month" => Ok(IntervalType::Month),
431 "Quarter" => Ok(IntervalType::Quarter),
432 "Year" => Ok(IntervalType::Year),
433 _ => Err(TypesError::TypeParsingError(format!(
434 "Unknown interval type: {s}"
435 ))),
436 }
437 }
438}
439
440impl Display for IntervalType {
441 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
442 match self {
443 Self::Nanosecond => write!(f, "Nanosecond"),
444 Self::Microsecond => write!(f, "Microsecond"),
445 Self::Millisecond => write!(f, "Millisecond"),
446 Self::Second => write!(f, "Second"),
447 Self::Minute => write!(f, "Minute"),
448 Self::Hour => write!(f, "Hour"),
449 Self::Day => write!(f, "Day"),
450 Self::Week => write!(f, "Week"),
451 Self::Month => write!(f, "Month"),
452 Self::Quarter => write!(f, "Quarter"),
453 Self::Year => write!(f, "Year"),
454 }
455 }
456}
457
458fn parse_fixed_string(input: &str) -> Result<DataTypeNode, TypesError> {
459 if input.len() >= 14 {
460 let size_str = &input[12..input.len() - 1];
461 let size = size_str.parse::<usize>().map_err(|err| {
462 TypesError::TypeParsingError(format!(
463 "Invalid FixedString size, expected a valid number. Underlying error: {err}, input: {input}, size_str: {size_str}"
464 ))
465 })?;
466 if size == 0 {
467 return Err(TypesError::TypeParsingError(format!(
468 "Invalid FixedString size, expected a positive number, got zero. Input: {input}"
469 )));
470 }
471 return Ok(DataTypeNode::FixedString(size));
472 }
473 Err(TypesError::TypeParsingError(format!(
474 "Invalid FixedString format, expected FixedString(N), got {input}"
475 )))
476}
477
478fn parse_array(input: &str) -> Result<DataTypeNode, TypesError> {
479 if input.len() >= 8 {
480 let inner_type_str = &input[6..input.len() - 1];
481 let inner_type = DataTypeNode::new(inner_type_str)?;
482 return Ok(DataTypeNode::Array(Box::new(inner_type)));
483 }
484 Err(TypesError::TypeParsingError(format!(
485 "Invalid Array format, expected Array(InnerType), got {input}"
486 )))
487}
488
489fn parse_enum(input: &str) -> Result<DataTypeNode, TypesError> {
490 if input.len() >= 9 {
491 let (enum_type, prefix_len) = if input.starts_with("Enum8") {
492 (EnumType::Enum8, 6)
493 } else if input.starts_with("Enum16") {
494 (EnumType::Enum16, 7)
495 } else {
496 return Err(TypesError::TypeParsingError(format!(
497 "Invalid Enum type, expected Enum8 or Enum16, got {input}"
498 )));
499 };
500 let enum_values_map_str = &input[prefix_len..input.len() - 1];
501 let enum_values_map = parse_enum_values_map(enum_values_map_str)?;
502 return Ok(DataTypeNode::Enum(enum_type, enum_values_map));
503 }
504 Err(TypesError::TypeParsingError(format!(
505 "Invalid Enum format, expected Enum8('name' = value), got {input}"
506 )))
507}
508
509fn parse_datetime(input: &str) -> Result<DataTypeNode, TypesError> {
510 if input == "DateTime" {
511 return Ok(DataTypeNode::DateTime(None));
512 }
513 if input.len() >= 12 {
514 let timezone = input[10..input.len() - 2].to_string();
515 return Ok(DataTypeNode::DateTime(Some(timezone)));
516 }
517 Err(TypesError::TypeParsingError(format!(
518 "Invalid DateTime format, expected DateTime('timezone'), got {input}"
519 )))
520}
521
522fn parse_decimal(input: &str) -> Result<DataTypeNode, TypesError> {
523 if input.len() >= 10 {
524 let precision_and_scale_str = input[8..input.len() - 1].split(", ").collect::<Vec<_>>();
525 if precision_and_scale_str.len() != 2 {
526 return Err(TypesError::TypeParsingError(format!(
527 "Invalid Decimal format, expected Decimal(P, S), got {input}"
528 )));
529 }
530 let parsed = precision_and_scale_str
531 .iter()
532 .map(|s| s.parse::<u8>())
533 .collect::<Result<Vec<_>, _>>()
534 .map_err(|err| {
535 TypesError::TypeParsingError(format!(
536 "Invalid Decimal format, expected Decimal(P, S), got {input}. Underlying error: {err}"
537 ))
538 })?;
539 let precision = parsed[0];
540 let scale = parsed[1];
541 if scale < 1 || precision < 1 {
542 return Err(TypesError::TypeParsingError(format!(
543 "Invalid Decimal format, expected Decimal(P, S) with P > 0 and S > 0, got {input}"
544 )));
545 }
546 if precision < scale {
547 return Err(TypesError::TypeParsingError(format!(
548 "Invalid Decimal format, expected Decimal(P, S) with P >= S, got {input}"
549 )));
550 }
551 let size = DecimalType::new(parsed[0])?;
552 return Ok(DataTypeNode::Decimal(precision, scale, size));
553 }
554 Err(TypesError::TypeParsingError(format!(
555 "Invalid Decimal format, expected Decimal(P), got {input}"
556 )))
557}
558
559fn parse_datetime64(input: &str) -> Result<DataTypeNode, TypesError> {
560 if input.len() >= 13 {
561 let mut chars = input[11..input.len() - 1].chars();
562 let precision_char = chars.next().ok_or(TypesError::TypeParsingError(format!(
563 "Invalid DateTime64 precision, expected a positive number. Input: {input}"
564 )))?;
565 let precision = DateTimePrecision::new(precision_char)?;
566 let maybe_tz = match chars.as_str() {
567 str if str.len() > 2 => Some(str[3..str.len() - 1].to_string()),
568 _ => None,
569 };
570 return Ok(DataTypeNode::DateTime64(precision, maybe_tz));
571 }
572 Err(TypesError::TypeParsingError(format!(
573 "Invalid DateTime format, expected DateTime('timezone'), got {input}"
574 )))
575}
576
577fn parse_time64(input: &str) -> Result<DataTypeNode, TypesError> {
578 if input.len() >= 8 {
579 let mut chars = input[7..input.len() - 1].chars();
580 let precision_char = chars.next().ok_or(TypesError::TypeParsingError(format!(
581 "Invalid Time64 precision, expected a positive number. Input: {input}"
582 )))?;
583 let precision = DateTimePrecision::new(precision_char)?;
584
585 return Ok(DataTypeNode::Time64(precision));
586 }
587 Err(TypesError::TypeParsingError(format!(
588 "Invalid Time64 format, expected Time64(precision, 'timezone'), got {input}"
589 )))
590}
591
592fn parse_low_cardinality(input: &str) -> Result<DataTypeNode, TypesError> {
593 if input.len() >= 16 {
594 let inner_type_str = &input[15..input.len() - 1];
595 let inner_type = DataTypeNode::new(inner_type_str)?;
596 return Ok(DataTypeNode::LowCardinality(Box::new(inner_type)));
597 }
598 Err(TypesError::TypeParsingError(format!(
599 "Invalid LowCardinality format, expected LowCardinality(InnerType), got {input}"
600 )))
601}
602
603fn parse_nullable(input: &str) -> Result<DataTypeNode, TypesError> {
604 if input.len() >= 10 {
605 let inner_type_str = &input[9..input.len() - 1];
606 let inner_type = DataTypeNode::new(inner_type_str)?;
607 return Ok(DataTypeNode::Nullable(Box::new(inner_type)));
608 }
609 Err(TypesError::TypeParsingError(format!(
610 "Invalid Nullable format, expected Nullable(InnerType), got {input}"
611 )))
612}
613
614fn parse_map(input: &str) -> Result<DataTypeNode, TypesError> {
615 if input.len() >= 5 {
616 let inner_types_str = &input[4..input.len() - 1];
617 let inner_types = parse_inner_types(inner_types_str)?;
618 if inner_types.len() != 2 {
619 return Err(TypesError::TypeParsingError(format!(
620 "Expected two inner elements in a Map from input {input}"
621 )));
622 }
623 return Ok(DataTypeNode::Map([
624 Box::new(inner_types[0].clone()),
625 Box::new(inner_types[1].clone()),
626 ]));
627 }
628 Err(TypesError::TypeParsingError(format!(
629 "Invalid Map format, expected Map(KeyType, ValueType), got {input}"
630 )))
631}
632
633fn parse_tuple(input: &str) -> Result<DataTypeNode, TypesError> {
634 if input.len() > 7 {
635 let inner_types_str = &input[6..input.len() - 1];
636 let inner_types = parse_inner_types(inner_types_str)?;
637 if inner_types.is_empty() {
638 return Err(TypesError::TypeParsingError(format!(
639 "Expected at least one inner element in a Tuple from input {input}"
640 )));
641 }
642 return Ok(DataTypeNode::Tuple(inner_types));
643 }
644 Err(TypesError::TypeParsingError(format!(
645 "Invalid Tuple format, expected Tuple(Type1, Type2, ...), got {input}"
646 )))
647}
648
649fn parse_variant(input: &str) -> Result<DataTypeNode, TypesError> {
650 if input.len() >= 9 {
651 let inner_types_str = &input[8..input.len() - 1];
652 let inner_types = parse_inner_types(inner_types_str)?;
653 return Ok(DataTypeNode::Variant(inner_types));
654 }
655 Err(TypesError::TypeParsingError(format!(
656 "Invalid Variant format, expected Variant(Type1, Type2, ...), got {input}"
657 )))
658}
659
660fn parse_inner_types(input: &str) -> Result<Vec<DataTypeNode>, TypesError> {
667 let mut inner_types: Vec<DataTypeNode> = Vec::new();
668
669 let input_bytes = input.as_bytes();
670
671 let mut open_parens = 0;
672 let mut quote_open = false;
673 let mut char_escaped = false;
674 let mut last_element_index = 0;
675
676 let mut i = 0;
677 while i < input_bytes.len() {
678 if char_escaped {
679 char_escaped = false;
680 } else if input_bytes[i] == b'\\' {
681 char_escaped = true;
682 } else if input_bytes[i] == b'\'' {
683 quote_open = !quote_open; } else if !quote_open {
685 if input_bytes[i] == b'(' {
686 open_parens += 1;
687 } else if input_bytes[i] == b')' {
688 open_parens -= 1;
689 } else if input_bytes[i] == b',' && open_parens == 0 {
690 let data_type_str = String::from_utf8(input_bytes[last_element_index..i].to_vec())
691 .map_err(|_| {
692 TypesError::TypeParsingError(format!(
693 "Invalid UTF-8 sequence in input for the inner data type: {}",
694 &input[last_element_index..]
695 ))
696 })?;
697 let data_type = DataTypeNode::new(&data_type_str)?;
698 inner_types.push(data_type);
699 if i + 2 <= input_bytes.len() && input_bytes[i + 1] == b' ' {
701 i += 2;
702 } else {
703 i += 1;
704 }
705 last_element_index = i;
706 continue; }
708 }
709 i += 1;
710 }
711
712 if open_parens == 0 && last_element_index < input_bytes.len() {
714 let data_type_str =
715 String::from_utf8(input_bytes[last_element_index..].to_vec()).map_err(|_| {
716 TypesError::TypeParsingError(format!(
717 "Invalid UTF-8 sequence in input for the inner data type: {}",
718 &input[last_element_index..]
719 ))
720 })?;
721 let data_type = DataTypeNode::new(&data_type_str)?;
722 inner_types.push(data_type);
723 }
724
725 Ok(inner_types)
726}
727
728#[inline]
729fn parse_enum_index(input_bytes: &[u8], input: &str) -> Result<i16, TypesError> {
730 String::from_utf8(input_bytes.to_vec())
731 .map_err(|_| {
732 TypesError::TypeParsingError(format!(
733 "Invalid UTF-8 sequence in input for the enum index: {}",
734 &input
735 ))
736 })?
737 .parse::<i16>()
738 .map_err(|_| {
739 TypesError::TypeParsingError(format!(
740 "Invalid Enum index, expected a valid number. Input: {input}"
741 ))
742 })
743}
744
745fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError> {
746 let mut names: Vec<String> = Vec::new();
747 let mut indices: Vec<i16> = Vec::new();
748 let mut parsing_name = true; let mut char_escaped = false; let mut start_index = 1; let mut i = 1;
753 let input_bytes = input.as_bytes();
754 while i < input_bytes.len() {
755 if parsing_name {
756 if char_escaped {
757 char_escaped = false;
758 } else if input_bytes[i] == b'\\' {
759 char_escaped = true;
760 } else if input_bytes[i] == b'\'' {
761 let name_bytes = &input_bytes[start_index..i];
763 let name = String::from_utf8(name_bytes.to_vec()).map_err(|_| {
764 TypesError::TypeParsingError(format!(
765 "Invalid UTF-8 sequence in input for the enum name: {}",
766 &input[start_index..i]
767 ))
768 })?;
769 names.push(name);
770
771 if i + 4 >= input_bytes.len() {
773 return Err(TypesError::TypeParsingError(format!(
774 "Invalid Enum format - expected ` = ` after name, input: {input}",
775 )));
776 }
777 i += 4;
778 start_index = i;
779 parsing_name = false;
780 }
781 }
782 else if input_bytes[i] < b'0' || input_bytes[i] > b'9' {
784 let index = parse_enum_index(&input_bytes[start_index..i], input)?;
785 indices.push(index);
786
787 if i + 2 >= input_bytes.len() {
790 break; }
792 i += 2;
793 start_index = i + 1;
794 parsing_name = true;
795 char_escaped = false;
796 }
797
798 i += 1;
799 }
800
801 let index = parse_enum_index(&input_bytes[start_index..i], input)?;
802 indices.push(index);
803
804 if names.len() != indices.len() {
805 return Err(TypesError::TypeParsingError(format!(
806 "Invalid Enum format - expected the same number of names and indices, got names: {}, indices: {}",
807 names.join(", "),
808 indices
809 .iter()
810 .map(|index| index.to_string())
811 .collect::<Vec<String>>()
812 .join(", "),
813 )));
814 }
815
816 Ok(indices
817 .into_iter()
818 .zip(names)
819 .collect::<HashMap<i16, String>>())
820}
821
822#[cfg(test)]
823mod tests {
824 use super::*;
825
826 #[test]
827 fn test_aggregate_function_display() {
828 let simple = DataTypeNode::AggregateFunction("sum".to_string(), vec![DataTypeNode::UInt64]);
829 assert_eq!(simple.to_string(), "AggregateFunction(sum, UInt64)");
830
831 let complex = DataTypeNode::AggregateFunction(
832 "groupArray".to_string(),
833 vec![
834 DataTypeNode::String,
835 DataTypeNode::UInt32,
836 DataTypeNode::Nullable(Box::new(DataTypeNode::Float64)),
837 ],
838 );
839 assert_eq!(
840 complex.to_string(),
841 "AggregateFunction(groupArray, String, UInt32, Nullable(Float64))"
842 );
843 }
844
845 #[test]
846 fn test_tuple_display() {
847 let empty = DataTypeNode::Tuple(vec![]);
848 assert_eq!(empty.to_string(), "Tuple()");
849
850 let single = DataTypeNode::Tuple(vec![DataTypeNode::String]);
851 assert_eq!(single.to_string(), "Tuple(String)");
852
853 let multiple = DataTypeNode::Tuple(vec![
854 DataTypeNode::UInt64,
855 DataTypeNode::String,
856 DataTypeNode::DateTime(None),
857 DataTypeNode::Array(Box::new(DataTypeNode::Int32)),
858 ]);
859 assert_eq!(
860 multiple.to_string(),
861 "Tuple(UInt64, String, DateTime, Array(Int32))"
862 );
863 }
864
865 #[test]
866 fn test_enum_display() {
867 let mut values1 = HashMap::new();
868 values1.insert(1, "one".to_string());
869 values1.insert(2, "two".to_string());
870 values1.insert(3, "three".to_string());
871
872 let simple_enum = DataTypeNode::Enum(EnumType::Enum8, values1);
873 assert_eq!(
874 simple_enum.to_string(),
875 "Enum8('one' = 1, 'two' = 2, 'three' = 3)"
876 );
877
878 let mut values2 = HashMap::new();
880 values2.insert(10, "ten".to_string());
881 values2.insert(1, "one".to_string());
882 values2.insert(5, "five".to_string());
883
884 let ordered_enum = DataTypeNode::Enum(EnumType::Enum16, values2);
885 assert_eq!(
886 ordered_enum.to_string(),
887 "Enum16('one' = 1, 'five' = 5, 'ten' = 10)"
888 );
889 }
890
891 #[test]
892 fn test_variant_display() {
893 let empty = DataTypeNode::Variant(vec![]);
895 assert_eq!(empty.to_string(), "Variant()");
896
897 let single = DataTypeNode::Variant(vec![DataTypeNode::String]);
899 assert_eq!(single.to_string(), "Variant(String)");
900
901 let multiple = DataTypeNode::Variant(vec![
903 DataTypeNode::UInt64,
904 DataTypeNode::String,
905 DataTypeNode::Nullable(Box::new(DataTypeNode::DateTime(None))),
906 DataTypeNode::Array(Box::new(DataTypeNode::Int32)),
907 ]);
908 assert_eq!(
909 multiple.to_string(),
910 "Variant(UInt64, String, Nullable(DateTime), Array(Int32))"
911 );
912
913 let nested = DataTypeNode::Variant(vec![
915 DataTypeNode::Tuple(vec![DataTypeNode::String, DataTypeNode::UInt64]),
916 DataTypeNode::Map([
917 Box::new(DataTypeNode::String),
918 Box::new(DataTypeNode::Int32),
919 ]),
920 ]);
921 assert_eq!(
922 nested.to_string(),
923 "Variant(Tuple(String, UInt64), Map(String, Int32))"
924 );
925 }
926
927 #[test]
928 fn test_data_type_new_simple() {
929 assert_eq!(DataTypeNode::new("UInt8").unwrap(), DataTypeNode::UInt8);
930 assert_eq!(DataTypeNode::new("UInt16").unwrap(), DataTypeNode::UInt16);
931 assert_eq!(DataTypeNode::new("UInt32").unwrap(), DataTypeNode::UInt32);
932 assert_eq!(DataTypeNode::new("UInt64").unwrap(), DataTypeNode::UInt64);
933 assert_eq!(DataTypeNode::new("UInt128").unwrap(), DataTypeNode::UInt128);
934 assert_eq!(DataTypeNode::new("UInt256").unwrap(), DataTypeNode::UInt256);
935 assert_eq!(DataTypeNode::new("Int8").unwrap(), DataTypeNode::Int8);
936 assert_eq!(DataTypeNode::new("Int16").unwrap(), DataTypeNode::Int16);
937 assert_eq!(DataTypeNode::new("Int32").unwrap(), DataTypeNode::Int32);
938 assert_eq!(DataTypeNode::new("Int64").unwrap(), DataTypeNode::Int64);
939 assert_eq!(DataTypeNode::new("Int128").unwrap(), DataTypeNode::Int128);
940 assert_eq!(DataTypeNode::new("Int256").unwrap(), DataTypeNode::Int256);
941 assert_eq!(DataTypeNode::new("Float32").unwrap(), DataTypeNode::Float32);
942 assert_eq!(DataTypeNode::new("Float64").unwrap(), DataTypeNode::Float64);
943 assert_eq!(
944 DataTypeNode::new("BFloat16").unwrap(),
945 DataTypeNode::BFloat16
946 );
947 assert_eq!(DataTypeNode::new("String").unwrap(), DataTypeNode::String);
948 assert_eq!(DataTypeNode::new("UUID").unwrap(), DataTypeNode::UUID);
949 assert_eq!(DataTypeNode::new("Date").unwrap(), DataTypeNode::Date);
950 assert_eq!(DataTypeNode::new("Date32").unwrap(), DataTypeNode::Date32);
951 assert_eq!(DataTypeNode::new("IPv4").unwrap(), DataTypeNode::IPv4);
952 assert_eq!(DataTypeNode::new("IPv6").unwrap(), DataTypeNode::IPv6);
953 assert_eq!(DataTypeNode::new("Bool").unwrap(), DataTypeNode::Bool);
954 assert_eq!(DataTypeNode::new("Dynamic").unwrap(), DataTypeNode::Dynamic);
955 assert_eq!(DataTypeNode::new("JSON").unwrap(), DataTypeNode::JSON);
956 assert!(DataTypeNode::new("SomeUnknownType").is_err());
957 }
958
959 #[test]
960 fn test_data_type_new_fixed_string() {
961 assert_eq!(
962 DataTypeNode::new("FixedString(1)").unwrap(),
963 DataTypeNode::FixedString(1)
964 );
965 assert_eq!(
966 DataTypeNode::new("FixedString(16)").unwrap(),
967 DataTypeNode::FixedString(16)
968 );
969 assert_eq!(
970 DataTypeNode::new("FixedString(255)").unwrap(),
971 DataTypeNode::FixedString(255)
972 );
973 assert_eq!(
974 DataTypeNode::new("FixedString(65535)").unwrap(),
975 DataTypeNode::FixedString(65_535)
976 );
977 assert!(DataTypeNode::new("FixedString()").is_err());
978 assert!(DataTypeNode::new("FixedString(0)").is_err());
979 assert!(DataTypeNode::new("FixedString(-1)").is_err());
980 assert!(DataTypeNode::new("FixedString(abc)").is_err());
981 }
982
983 #[test]
984 fn test_data_type_new_array() {
985 assert_eq!(
986 DataTypeNode::new("Array(UInt8)").unwrap(),
987 DataTypeNode::Array(Box::new(DataTypeNode::UInt8))
988 );
989 assert_eq!(
990 DataTypeNode::new("Array(String)").unwrap(),
991 DataTypeNode::Array(Box::new(DataTypeNode::String))
992 );
993 assert_eq!(
994 DataTypeNode::new("Array(FixedString(16))").unwrap(),
995 DataTypeNode::Array(Box::new(DataTypeNode::FixedString(16)))
996 );
997 assert_eq!(
998 DataTypeNode::new("Array(Nullable(Int32))").unwrap(),
999 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1000 DataTypeNode::Int32
1001 ))))
1002 );
1003 assert!(DataTypeNode::new("Array()").is_err());
1004 assert!(DataTypeNode::new("Array(abc)").is_err());
1005 }
1006
1007 #[test]
1008 fn test_data_type_new_decimal() {
1009 assert_eq!(
1010 DataTypeNode::new("Decimal(7, 2)").unwrap(),
1011 DataTypeNode::Decimal(7, 2, DecimalType::Decimal32)
1012 );
1013 assert_eq!(
1014 DataTypeNode::new("Decimal(12, 4)").unwrap(),
1015 DataTypeNode::Decimal(12, 4, DecimalType::Decimal64)
1016 );
1017 assert_eq!(
1018 DataTypeNode::new("Decimal(27, 6)").unwrap(),
1019 DataTypeNode::Decimal(27, 6, DecimalType::Decimal128)
1020 );
1021 assert_eq!(
1022 DataTypeNode::new("Decimal(42, 8)").unwrap(),
1023 DataTypeNode::Decimal(42, 8, DecimalType::Decimal256)
1024 );
1025 assert!(DataTypeNode::new("Decimal").is_err());
1026 assert!(DataTypeNode::new("Decimal(").is_err());
1027 assert!(DataTypeNode::new("Decimal()").is_err());
1028 assert!(DataTypeNode::new("Decimal(1)").is_err());
1029 assert!(DataTypeNode::new("Decimal(1,)").is_err());
1030 assert!(DataTypeNode::new("Decimal(1, )").is_err());
1031 assert!(DataTypeNode::new("Decimal(0, 0)").is_err()); assert!(DataTypeNode::new("Decimal(x, 0)").is_err()); assert!(DataTypeNode::new("Decimal(', ')").is_err());
1034 assert!(DataTypeNode::new("Decimal(77, 1)").is_err()); assert!(DataTypeNode::new("Decimal(1, 2)").is_err()); assert!(DataTypeNode::new("Decimal(1, x)").is_err()); assert!(DataTypeNode::new("Decimal(42, ,)").is_err());
1038 assert!(DataTypeNode::new("Decimal(42, ')").is_err());
1039 assert!(DataTypeNode::new("Decimal(foobar)").is_err());
1040 }
1041
1042 #[test]
1043 fn test_data_type_new_datetime() {
1044 assert_eq!(
1045 DataTypeNode::new("DateTime").unwrap(),
1046 DataTypeNode::DateTime(None)
1047 );
1048 assert_eq!(
1049 DataTypeNode::new("DateTime('UTC')").unwrap(),
1050 DataTypeNode::DateTime(Some("UTC".to_string()))
1051 );
1052 assert_eq!(
1053 DataTypeNode::new("DateTime('America/New_York')").unwrap(),
1054 DataTypeNode::DateTime(Some("America/New_York".to_string()))
1055 );
1056 assert!(DataTypeNode::new("DateTime()").is_err());
1057 }
1058
1059 #[test]
1060 fn test_data_type_new_datetime64() {
1061 assert_eq!(
1062 DataTypeNode::new("DateTime64(0)").unwrap(),
1063 DataTypeNode::DateTime64(DateTimePrecision::Precision0, None)
1064 );
1065 assert_eq!(
1066 DataTypeNode::new("DateTime64(1)").unwrap(),
1067 DataTypeNode::DateTime64(DateTimePrecision::Precision1, None)
1068 );
1069 assert_eq!(
1070 DataTypeNode::new("DateTime64(2)").unwrap(),
1071 DataTypeNode::DateTime64(DateTimePrecision::Precision2, None)
1072 );
1073 assert_eq!(
1074 DataTypeNode::new("DateTime64(3)").unwrap(),
1075 DataTypeNode::DateTime64(DateTimePrecision::Precision3, None)
1076 );
1077 assert_eq!(
1078 DataTypeNode::new("DateTime64(4)").unwrap(),
1079 DataTypeNode::DateTime64(DateTimePrecision::Precision4, None)
1080 );
1081 assert_eq!(
1082 DataTypeNode::new("DateTime64(5)").unwrap(),
1083 DataTypeNode::DateTime64(DateTimePrecision::Precision5, None)
1084 );
1085 assert_eq!(
1086 DataTypeNode::new("DateTime64(6)").unwrap(),
1087 DataTypeNode::DateTime64(DateTimePrecision::Precision6, None)
1088 );
1089 assert_eq!(
1090 DataTypeNode::new("DateTime64(7)").unwrap(),
1091 DataTypeNode::DateTime64(DateTimePrecision::Precision7, None)
1092 );
1093 assert_eq!(
1094 DataTypeNode::new("DateTime64(8)").unwrap(),
1095 DataTypeNode::DateTime64(DateTimePrecision::Precision8, None)
1096 );
1097 assert_eq!(
1098 DataTypeNode::new("DateTime64(9)").unwrap(),
1099 DataTypeNode::DateTime64(DateTimePrecision::Precision9, None)
1100 );
1101 assert_eq!(
1102 DataTypeNode::new("DateTime64(0, 'UTC')").unwrap(),
1103 DataTypeNode::DateTime64(DateTimePrecision::Precision0, Some("UTC".to_string()))
1104 );
1105 assert_eq!(
1106 DataTypeNode::new("DateTime64(3, 'America/New_York')").unwrap(),
1107 DataTypeNode::DateTime64(
1108 DateTimePrecision::Precision3,
1109 Some("America/New_York".to_string())
1110 )
1111 );
1112 assert_eq!(
1113 DataTypeNode::new("DateTime64(6, 'America/New_York')").unwrap(),
1114 DataTypeNode::DateTime64(
1115 DateTimePrecision::Precision6,
1116 Some("America/New_York".to_string())
1117 )
1118 );
1119 assert_eq!(
1120 DataTypeNode::new("DateTime64(9, 'Europe/Amsterdam')").unwrap(),
1121 DataTypeNode::DateTime64(
1122 DateTimePrecision::Precision9,
1123 Some("Europe/Amsterdam".to_string())
1124 )
1125 );
1126 assert!(DataTypeNode::new("DateTime64()").is_err());
1127 assert!(DataTypeNode::new("DateTime64(x)").is_err());
1128 }
1129
1130 #[test]
1131 fn test_data_type_new_time() {
1132 assert_eq!(DataTypeNode::new("Time").unwrap(), DataTypeNode::Time);
1133 assert_eq!(
1134 DataTypeNode::new("Time('UTC')").unwrap(),
1135 DataTypeNode::Time
1136 );
1137 assert_eq!(
1138 DataTypeNode::new("Time('America/New_York')").unwrap(),
1139 DataTypeNode::Time
1140 );
1141 assert_eq!(DataTypeNode::new("Time()").unwrap(), DataTypeNode::Time);
1142 }
1143
1144 #[test]
1145 fn test_data_type_new_time64() {
1146 assert_eq!(
1147 DataTypeNode::new("Time64(0)").unwrap(),
1148 DataTypeNode::Time64(DateTimePrecision::Precision0)
1149 );
1150 assert_eq!(
1151 DataTypeNode::new("Time64(1)").unwrap(),
1152 DataTypeNode::Time64(DateTimePrecision::Precision1)
1153 );
1154 assert_eq!(
1155 DataTypeNode::new("Time64(2)").unwrap(),
1156 DataTypeNode::Time64(DateTimePrecision::Precision2)
1157 );
1158 assert_eq!(
1159 DataTypeNode::new("Time64(3)").unwrap(),
1160 DataTypeNode::Time64(DateTimePrecision::Precision3)
1161 );
1162 assert_eq!(
1163 DataTypeNode::new("Time64(4)").unwrap(),
1164 DataTypeNode::Time64(DateTimePrecision::Precision4)
1165 );
1166 assert_eq!(
1167 DataTypeNode::new("Time64(5)").unwrap(),
1168 DataTypeNode::Time64(DateTimePrecision::Precision5)
1169 );
1170 assert_eq!(
1171 DataTypeNode::new("Time64(6)").unwrap(),
1172 DataTypeNode::Time64(DateTimePrecision::Precision6)
1173 );
1174 assert_eq!(
1175 DataTypeNode::new("Time64(7)").unwrap(),
1176 DataTypeNode::Time64(DateTimePrecision::Precision7)
1177 );
1178 assert_eq!(
1179 DataTypeNode::new("Time64(8)").unwrap(),
1180 DataTypeNode::Time64(DateTimePrecision::Precision8)
1181 );
1182 assert_eq!(
1183 DataTypeNode::new("Time64(9)").unwrap(),
1184 DataTypeNode::Time64(DateTimePrecision::Precision9)
1185 );
1186 assert_eq!(
1187 DataTypeNode::new("Time64(0, 'UTC')").unwrap(),
1188 DataTypeNode::Time64(DateTimePrecision::Precision0)
1189 );
1190 assert_eq!(
1191 DataTypeNode::new("Time64(3, 'America/New_York')").unwrap(),
1192 DataTypeNode::Time64(DateTimePrecision::Precision3)
1193 );
1194 assert_eq!(
1195 DataTypeNode::new("Time64(6, 'America/New_York')").unwrap(),
1196 DataTypeNode::Time64(DateTimePrecision::Precision6)
1197 );
1198 assert_eq!(
1199 DataTypeNode::new("Time64(9, 'Europe/Amsterdam')").unwrap(),
1200 DataTypeNode::Time64(DateTimePrecision::Precision9)
1201 );
1202 assert!(DataTypeNode::new("Time64()").is_err());
1203 assert!(DataTypeNode::new("Time64(x)").is_err());
1204 }
1205
1206 #[test]
1207 fn test_data_type_new_interval() {
1208 assert_eq!(
1209 DataTypeNode::new("IntervalNanosecond").unwrap(),
1210 DataTypeNode::Interval(IntervalType::Nanosecond)
1211 );
1212 assert_eq!(
1213 DataTypeNode::new("IntervalMicrosecond").unwrap(),
1214 DataTypeNode::Interval(IntervalType::Microsecond)
1215 );
1216 assert_eq!(
1217 DataTypeNode::new("IntervalMillisecond").unwrap(),
1218 DataTypeNode::Interval(IntervalType::Millisecond)
1219 );
1220 assert_eq!(
1221 DataTypeNode::new("IntervalSecond").unwrap(),
1222 DataTypeNode::Interval(IntervalType::Second)
1223 );
1224 assert_eq!(
1225 DataTypeNode::new("IntervalMinute").unwrap(),
1226 DataTypeNode::Interval(IntervalType::Minute)
1227 );
1228 assert_eq!(
1229 DataTypeNode::new("IntervalHour").unwrap(),
1230 DataTypeNode::Interval(IntervalType::Hour)
1231 );
1232 assert_eq!(
1233 DataTypeNode::new("IntervalDay").unwrap(),
1234 DataTypeNode::Interval(IntervalType::Day)
1235 );
1236 assert_eq!(
1237 DataTypeNode::new("IntervalWeek").unwrap(),
1238 DataTypeNode::Interval(IntervalType::Week)
1239 );
1240 assert_eq!(
1241 DataTypeNode::new("IntervalMonth").unwrap(),
1242 DataTypeNode::Interval(IntervalType::Month)
1243 );
1244 assert_eq!(
1245 DataTypeNode::new("IntervalQuarter").unwrap(),
1246 DataTypeNode::Interval(IntervalType::Quarter)
1247 );
1248 assert_eq!(
1249 DataTypeNode::new("IntervalYear").unwrap(),
1250 DataTypeNode::Interval(IntervalType::Year)
1251 );
1252 }
1253
1254 #[test]
1255 fn test_data_type_new_low_cardinality() {
1256 assert_eq!(
1257 DataTypeNode::new("LowCardinality(UInt8)").unwrap(),
1258 DataTypeNode::LowCardinality(Box::new(DataTypeNode::UInt8))
1259 );
1260 assert_eq!(
1261 DataTypeNode::new("LowCardinality(String)").unwrap(),
1262 DataTypeNode::LowCardinality(Box::new(DataTypeNode::String))
1263 );
1264 assert_eq!(
1265 DataTypeNode::new("LowCardinality(Array(Int32))").unwrap(),
1266 DataTypeNode::LowCardinality(Box::new(DataTypeNode::Array(Box::new(
1267 DataTypeNode::Int32
1268 ))))
1269 );
1270 assert_eq!(
1271 DataTypeNode::new("LowCardinality(Nullable(Int32))").unwrap(),
1272 DataTypeNode::LowCardinality(Box::new(DataTypeNode::Nullable(Box::new(
1273 DataTypeNode::Int32
1274 ))))
1275 );
1276 assert!(DataTypeNode::new("LowCardinality").is_err());
1277 assert!(DataTypeNode::new("LowCardinality()").is_err());
1278 assert!(DataTypeNode::new("LowCardinality(X)").is_err());
1279 }
1280
1281 #[test]
1282 fn test_data_type_new_nullable() {
1283 assert_eq!(
1284 DataTypeNode::new("Nullable(UInt8)").unwrap(),
1285 DataTypeNode::Nullable(Box::new(DataTypeNode::UInt8))
1286 );
1287 assert_eq!(
1288 DataTypeNode::new("Nullable(String)").unwrap(),
1289 DataTypeNode::Nullable(Box::new(DataTypeNode::String))
1290 );
1291 assert!(DataTypeNode::new("Nullable").is_err());
1292 assert!(DataTypeNode::new("Nullable()").is_err());
1293 assert!(DataTypeNode::new("Nullable(X)").is_err());
1294 }
1295
1296 #[test]
1297 fn test_data_type_new_map() {
1298 assert_eq!(
1299 DataTypeNode::new("Map(UInt8, String)").unwrap(),
1300 DataTypeNode::Map([
1301 Box::new(DataTypeNode::UInt8),
1302 Box::new(DataTypeNode::String)
1303 ])
1304 );
1305 assert_eq!(
1306 DataTypeNode::new("Map(String, Int32)").unwrap(),
1307 DataTypeNode::Map([
1308 Box::new(DataTypeNode::String),
1309 Box::new(DataTypeNode::Int32)
1310 ])
1311 );
1312 assert_eq!(
1313 DataTypeNode::new("Map(String, Map(Int32, Array(Nullable(String))))").unwrap(),
1314 DataTypeNode::Map([
1315 Box::new(DataTypeNode::String),
1316 Box::new(DataTypeNode::Map([
1317 Box::new(DataTypeNode::Int32),
1318 Box::new(DataTypeNode::Array(Box::new(DataTypeNode::Nullable(
1319 Box::new(DataTypeNode::String)
1320 ))))
1321 ]))
1322 ])
1323 );
1324 assert!(DataTypeNode::new("Map()").is_err());
1325 assert!(DataTypeNode::new("Map").is_err());
1326 assert!(DataTypeNode::new("Map(K)").is_err());
1327 assert!(DataTypeNode::new("Map(K, V)").is_err());
1328 assert!(DataTypeNode::new("Map(Int32, V)").is_err());
1329 assert!(DataTypeNode::new("Map(K, Int32)").is_err());
1330 assert!(DataTypeNode::new("Map(String, Int32").is_err());
1331 }
1332
1333 #[test]
1334 fn test_data_type_new_variant() {
1335 assert_eq!(
1336 DataTypeNode::new("Variant(UInt8, String)").unwrap(),
1337 DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::String])
1338 );
1339 assert_eq!(
1340 DataTypeNode::new("Variant(String, Int32)").unwrap(),
1341 DataTypeNode::Variant(vec![DataTypeNode::String, DataTypeNode::Int32])
1342 );
1343 assert_eq!(
1344 DataTypeNode::new("Variant(Int32, Array(Nullable(String)), Map(Int32, String))")
1345 .unwrap(),
1346 DataTypeNode::Variant(vec![
1347 DataTypeNode::Int32,
1348 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1349 DataTypeNode::String
1350 )))),
1351 DataTypeNode::Map([
1352 Box::new(DataTypeNode::Int32),
1353 Box::new(DataTypeNode::String)
1354 ])
1355 ])
1356 );
1357 assert!(DataTypeNode::new("Variant").is_err());
1358 }
1359
1360 #[test]
1361 fn test_data_type_new_tuple() {
1362 assert_eq!(
1363 DataTypeNode::new("Tuple(UInt8, String)").unwrap(),
1364 DataTypeNode::Tuple(vec![DataTypeNode::UInt8, DataTypeNode::String])
1365 );
1366 assert_eq!(
1367 DataTypeNode::new("Tuple(String, Int32)").unwrap(),
1368 DataTypeNode::Tuple(vec![DataTypeNode::String, DataTypeNode::Int32])
1369 );
1370 assert_eq!(
1371 DataTypeNode::new("Tuple(Bool,Int32)").unwrap(),
1372 DataTypeNode::Tuple(vec![DataTypeNode::Bool, DataTypeNode::Int32])
1373 );
1374 assert_eq!(
1375 DataTypeNode::new(
1376 "Tuple(Int32, Array(Nullable(String)), Map(Int32, Tuple(String, Array(UInt8))))"
1377 )
1378 .unwrap(),
1379 DataTypeNode::Tuple(vec![
1380 DataTypeNode::Int32,
1381 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1382 DataTypeNode::String
1383 )))),
1384 DataTypeNode::Map([
1385 Box::new(DataTypeNode::Int32),
1386 Box::new(DataTypeNode::Tuple(vec![
1387 DataTypeNode::String,
1388 DataTypeNode::Array(Box::new(DataTypeNode::UInt8))
1389 ]))
1390 ])
1391 ])
1392 );
1393 assert_eq!(
1394 DataTypeNode::new(&format!("Tuple(String, {ENUM_WITH_ESCAPING_STR})")).unwrap(),
1395 DataTypeNode::Tuple(vec![DataTypeNode::String, enum_with_escaping()])
1396 );
1397 assert!(DataTypeNode::new("Tuple").is_err());
1398 assert!(DataTypeNode::new("Tuple(").is_err());
1399 assert!(DataTypeNode::new("Tuple()").is_err());
1400 assert!(DataTypeNode::new("Tuple(,)").is_err());
1401 assert!(DataTypeNode::new("Tuple(X)").is_err());
1402 assert!(DataTypeNode::new("Tuple(Int32, X)").is_err());
1403 assert!(DataTypeNode::new("Tuple(Int32, String, X)").is_err());
1404 }
1405
1406 #[test]
1407 fn test_data_type_new_enum() {
1408 assert_eq!(
1409 DataTypeNode::new("Enum8('A' = -42)").unwrap(),
1410 DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(-42, "A".to_string())]))
1411 );
1412 assert_eq!(
1413 DataTypeNode::new("Enum16('A' = -144)").unwrap(),
1414 DataTypeNode::Enum(EnumType::Enum16, HashMap::from([(-144, "A".to_string())]))
1415 );
1416 assert_eq!(
1417 DataTypeNode::new("Enum8('A' = 1, 'B' = 2)").unwrap(),
1418 DataTypeNode::Enum(
1419 EnumType::Enum8,
1420 HashMap::from([(1, "A".to_string()), (2, "B".to_string())])
1421 )
1422 );
1423 assert_eq!(
1424 DataTypeNode::new("Enum16('A' = 1, 'B' = 2)").unwrap(),
1425 DataTypeNode::Enum(
1426 EnumType::Enum16,
1427 HashMap::from([(1, "A".to_string()), (2, "B".to_string())])
1428 )
1429 );
1430 assert_eq!(
1431 DataTypeNode::new(ENUM_WITH_ESCAPING_STR).unwrap(),
1432 enum_with_escaping()
1433 );
1434 assert_eq!(
1435 DataTypeNode::new("Enum8('foo' = 0, '' = 42)").unwrap(),
1436 DataTypeNode::Enum(
1437 EnumType::Enum8,
1438 HashMap::from([(0, "foo".to_string()), (42, "".to_string())])
1439 )
1440 );
1441
1442 assert!(DataTypeNode::new("Enum()").is_err());
1443 assert!(DataTypeNode::new("Enum8()").is_err());
1444 assert!(DataTypeNode::new("Enum16()").is_err());
1445 assert!(DataTypeNode::new("Enum32('A' = 1, 'B' = 2)").is_err());
1446 assert!(DataTypeNode::new("Enum32('A','B')").is_err());
1447 assert!(DataTypeNode::new("Enum32('A' = 1, 'B')").is_err());
1448 assert!(DataTypeNode::new("Enum32('A' = 1, 'B' =)").is_err());
1449 assert!(DataTypeNode::new("Enum32('A' = 1, 'B' = )").is_err());
1450 assert!(DataTypeNode::new("Enum32('A'= 1,'B' =)").is_err());
1451 }
1452
1453 #[test]
1454 fn test_data_type_new_geo() {
1455 assert_eq!(DataTypeNode::new("Point").unwrap(), DataTypeNode::Point);
1456 assert_eq!(DataTypeNode::new("Ring").unwrap(), DataTypeNode::Ring);
1457 assert_eq!(
1458 DataTypeNode::new("LineString").unwrap(),
1459 DataTypeNode::LineString
1460 );
1461 assert_eq!(DataTypeNode::new("Polygon").unwrap(), DataTypeNode::Polygon);
1462 assert_eq!(
1463 DataTypeNode::new("MultiLineString").unwrap(),
1464 DataTypeNode::MultiLineString
1465 );
1466 assert_eq!(
1467 DataTypeNode::new("MultiPolygon").unwrap(),
1468 DataTypeNode::MultiPolygon
1469 );
1470 }
1471
1472 #[test]
1473 fn test_data_type_to_string_simple() {
1474 assert_eq!(DataTypeNode::UInt8.to_string(), "UInt8");
1476 assert_eq!(DataTypeNode::UInt16.to_string(), "UInt16");
1477 assert_eq!(DataTypeNode::UInt32.to_string(), "UInt32");
1478 assert_eq!(DataTypeNode::UInt64.to_string(), "UInt64");
1479 assert_eq!(DataTypeNode::UInt128.to_string(), "UInt128");
1480 assert_eq!(DataTypeNode::UInt256.to_string(), "UInt256");
1481 assert_eq!(DataTypeNode::Int8.to_string(), "Int8");
1482 assert_eq!(DataTypeNode::Int16.to_string(), "Int16");
1483 assert_eq!(DataTypeNode::Int32.to_string(), "Int32");
1484 assert_eq!(DataTypeNode::Int64.to_string(), "Int64");
1485 assert_eq!(DataTypeNode::Int128.to_string(), "Int128");
1486 assert_eq!(DataTypeNode::Int256.to_string(), "Int256");
1487 assert_eq!(DataTypeNode::Float32.to_string(), "Float32");
1488 assert_eq!(DataTypeNode::Float64.to_string(), "Float64");
1489 assert_eq!(DataTypeNode::BFloat16.to_string(), "BFloat16");
1490 assert_eq!(DataTypeNode::UUID.to_string(), "UUID");
1491 assert_eq!(DataTypeNode::Date.to_string(), "Date");
1492 assert_eq!(DataTypeNode::Date32.to_string(), "Date32");
1493 assert_eq!(DataTypeNode::IPv4.to_string(), "IPv4");
1494 assert_eq!(DataTypeNode::IPv6.to_string(), "IPv6");
1495 assert_eq!(DataTypeNode::Bool.to_string(), "Bool");
1496 assert_eq!(DataTypeNode::Dynamic.to_string(), "Dynamic");
1497 assert_eq!(DataTypeNode::JSON.to_string(), "JSON");
1498 assert_eq!(DataTypeNode::String.to_string(), "String");
1499 }
1500
1501 #[test]
1502 fn test_data_types_to_string_complex() {
1503 assert_eq!(DataTypeNode::DateTime(None).to_string(), "DateTime");
1504 assert_eq!(
1505 DataTypeNode::DateTime(Some("UTC".to_string())).to_string(),
1506 "DateTime('UTC')"
1507 );
1508 assert_eq!(
1509 DataTypeNode::DateTime(Some("America/New_York".to_string())).to_string(),
1510 "DateTime('America/New_York')"
1511 );
1512
1513 assert_eq!(
1514 DataTypeNode::Nullable(Box::new(DataTypeNode::UInt64)).to_string(),
1515 "Nullable(UInt64)"
1516 );
1517 assert_eq!(
1518 DataTypeNode::LowCardinality(Box::new(DataTypeNode::String)).to_string(),
1519 "LowCardinality(String)"
1520 );
1521 assert_eq!(
1522 DataTypeNode::Array(Box::new(DataTypeNode::String)).to_string(),
1523 "Array(String)"
1524 );
1525 assert_eq!(
1526 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1527 DataTypeNode::String
1528 ))))
1529 .to_string(),
1530 "Array(Nullable(String))"
1531 );
1532 assert_eq!(
1533 DataTypeNode::Tuple(vec![
1534 DataTypeNode::String,
1535 DataTypeNode::UInt32,
1536 DataTypeNode::Float64
1537 ])
1538 .to_string(),
1539 "Tuple(String, UInt32, Float64)"
1540 );
1541 assert_eq!(
1542 DataTypeNode::Map([
1543 Box::new(DataTypeNode::String),
1544 Box::new(DataTypeNode::UInt32)
1545 ])
1546 .to_string(),
1547 "Map(String, UInt32)"
1548 );
1549 assert_eq!(
1550 DataTypeNode::Decimal(10, 2, DecimalType::Decimal32).to_string(),
1551 "Decimal(10, 2)"
1552 );
1553 assert_eq!(
1554 DataTypeNode::Enum(
1555 EnumType::Enum8,
1556 HashMap::from([(1, "A".to_string()), (2, "B".to_string())]),
1557 )
1558 .to_string(),
1559 "Enum8('A' = 1, 'B' = 2)"
1560 );
1561 assert_eq!(
1562 DataTypeNode::Enum(
1563 EnumType::Enum16,
1564 HashMap::from([(42, "foo".to_string()), (144, "bar".to_string())]),
1565 )
1566 .to_string(),
1567 "Enum16('foo' = 42, 'bar' = 144)"
1568 );
1569 assert_eq!(enum_with_escaping().to_string(), ENUM_WITH_ESCAPING_STR);
1570 assert_eq!(
1571 DataTypeNode::AggregateFunction("sum".to_string(), vec![DataTypeNode::UInt64])
1572 .to_string(),
1573 "AggregateFunction(sum, UInt64)"
1574 );
1575 assert_eq!(DataTypeNode::FixedString(16).to_string(), "FixedString(16)");
1576 assert_eq!(
1577 DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::Bool]).to_string(),
1578 "Variant(UInt8, Bool)"
1579 );
1580 }
1581
1582 #[test]
1583 fn test_datetime64_to_string() {
1584 let test_cases = [
1585 (
1586 DataTypeNode::DateTime64(DateTimePrecision::Precision0, None),
1587 "DateTime64(0)",
1588 ),
1589 (
1590 DataTypeNode::DateTime64(DateTimePrecision::Precision1, None),
1591 "DateTime64(1)",
1592 ),
1593 (
1594 DataTypeNode::DateTime64(DateTimePrecision::Precision2, None),
1595 "DateTime64(2)",
1596 ),
1597 (
1598 DataTypeNode::DateTime64(DateTimePrecision::Precision3, None),
1599 "DateTime64(3)",
1600 ),
1601 (
1602 DataTypeNode::DateTime64(DateTimePrecision::Precision4, None),
1603 "DateTime64(4)",
1604 ),
1605 (
1606 DataTypeNode::DateTime64(DateTimePrecision::Precision5, None),
1607 "DateTime64(5)",
1608 ),
1609 (
1610 DataTypeNode::DateTime64(DateTimePrecision::Precision6, None),
1611 "DateTime64(6)",
1612 ),
1613 (
1614 DataTypeNode::DateTime64(DateTimePrecision::Precision7, None),
1615 "DateTime64(7)",
1616 ),
1617 (
1618 DataTypeNode::DateTime64(DateTimePrecision::Precision8, None),
1619 "DateTime64(8)",
1620 ),
1621 (
1622 DataTypeNode::DateTime64(DateTimePrecision::Precision9, None),
1623 "DateTime64(9)",
1624 ),
1625 (
1626 DataTypeNode::DateTime64(DateTimePrecision::Precision0, Some("UTC".to_string())),
1627 "DateTime64(0, 'UTC')",
1628 ),
1629 (
1630 DataTypeNode::DateTime64(
1631 DateTimePrecision::Precision3,
1632 Some("America/New_York".to_string()),
1633 ),
1634 "DateTime64(3, 'America/New_York')",
1635 ),
1636 (
1637 DataTypeNode::DateTime64(
1638 DateTimePrecision::Precision6,
1639 Some("Europe/Amsterdam".to_string()),
1640 ),
1641 "DateTime64(6, 'Europe/Amsterdam')",
1642 ),
1643 (
1644 DataTypeNode::DateTime64(
1645 DateTimePrecision::Precision9,
1646 Some("Asia/Tokyo".to_string()),
1647 ),
1648 "DateTime64(9, 'Asia/Tokyo')",
1649 ),
1650 ];
1651 for (data_type, expected_str) in test_cases.iter() {
1652 assert_eq!(
1653 &data_type.to_string(),
1654 expected_str,
1655 "Expected data type {data_type} to be formatted as {expected_str}"
1656 );
1657 }
1658 }
1659
1660 #[test]
1661 fn test_interval_to_string() {
1662 assert_eq!(
1663 DataTypeNode::Interval(IntervalType::Nanosecond).to_string(),
1664 "IntervalNanosecond"
1665 );
1666 assert_eq!(
1667 DataTypeNode::Interval(IntervalType::Microsecond).to_string(),
1668 "IntervalMicrosecond"
1669 );
1670 assert_eq!(
1671 DataTypeNode::Interval(IntervalType::Millisecond).to_string(),
1672 "IntervalMillisecond"
1673 );
1674 assert_eq!(
1675 DataTypeNode::Interval(IntervalType::Second).to_string(),
1676 "IntervalSecond"
1677 );
1678 assert_eq!(
1679 DataTypeNode::Interval(IntervalType::Minute).to_string(),
1680 "IntervalMinute"
1681 );
1682 assert_eq!(
1683 DataTypeNode::Interval(IntervalType::Hour).to_string(),
1684 "IntervalHour"
1685 );
1686 assert_eq!(
1687 DataTypeNode::Interval(IntervalType::Day).to_string(),
1688 "IntervalDay"
1689 );
1690 assert_eq!(
1691 DataTypeNode::Interval(IntervalType::Week).to_string(),
1692 "IntervalWeek"
1693 );
1694 assert_eq!(
1695 DataTypeNode::Interval(IntervalType::Month).to_string(),
1696 "IntervalMonth"
1697 );
1698 assert_eq!(
1699 DataTypeNode::Interval(IntervalType::Quarter).to_string(),
1700 "IntervalQuarter"
1701 );
1702 assert_eq!(
1703 DataTypeNode::Interval(IntervalType::Year).to_string(),
1704 "IntervalYear"
1705 );
1706 }
1707
1708 #[test]
1709 fn test_data_type_node_into_string() {
1710 let data_type = DataTypeNode::new("Array(Int32)").unwrap();
1711 let data_type_string: String = data_type.into();
1712 assert_eq!(data_type_string, "Array(Int32)");
1713 }
1714
1715 #[test]
1716 fn test_data_type_to_string_geo() {
1717 assert_eq!(DataTypeNode::Point.to_string(), "Point");
1718 assert_eq!(DataTypeNode::Ring.to_string(), "Ring");
1719 assert_eq!(DataTypeNode::LineString.to_string(), "LineString");
1720 assert_eq!(DataTypeNode::Polygon.to_string(), "Polygon");
1721 assert_eq!(DataTypeNode::MultiLineString.to_string(), "MultiLineString");
1722 assert_eq!(DataTypeNode::MultiPolygon.to_string(), "MultiPolygon");
1723 }
1724
1725 #[test]
1726 fn test_display_column() {
1727 let column = Column::new(
1728 "col".to_string(),
1729 DataTypeNode::new("Array(Int32)").unwrap(),
1730 );
1731 assert_eq!(column.to_string(), "col: Array(Int32)");
1732 }
1733
1734 #[test]
1735 fn test_display_decimal_size() {
1736 assert_eq!(DecimalType::Decimal32.to_string(), "Decimal32");
1737 assert_eq!(DecimalType::Decimal64.to_string(), "Decimal64");
1738 assert_eq!(DecimalType::Decimal128.to_string(), "Decimal128");
1739 assert_eq!(DecimalType::Decimal256.to_string(), "Decimal256");
1740 }
1741
1742 #[test]
1743 fn test_time_time64_roundtrip_and_edges() {
1744 use super::DateTimePrecision::*;
1745
1746 assert_eq!(DataTypeNode::new("Time").unwrap(), DataTypeNode::Time);
1748
1749 assert_eq!(
1751 DataTypeNode::new("Time('UTC')").unwrap(),
1752 DataTypeNode::Time
1753 );
1754 assert_eq!(
1755 DataTypeNode::new("Time('Europe/Moscow')").unwrap(),
1756 DataTypeNode::Time
1757 );
1758
1759 assert_eq!(
1761 DataTypeNode::new("Time64(0)").unwrap(),
1762 DataTypeNode::Time64(Precision0)
1763 );
1764
1765 assert_eq!(
1767 DataTypeNode::new("Time64(9, 'Europe/Amsterdam')").unwrap(),
1768 DataTypeNode::Time64(Precision9)
1769 );
1770
1771 assert_eq!(
1773 DataTypeNode::new("Time64(0, 'UTC')").unwrap(),
1774 DataTypeNode::Time64(Precision0)
1775 );
1776
1777 assert_eq!(
1779 DataTypeNode::new("Time64(3)").unwrap(),
1780 DataTypeNode::Time64(Precision3)
1781 );
1782
1783 assert_eq!(
1785 DataTypeNode::new("Time64(6, 'America/New_York')").unwrap(),
1786 DataTypeNode::Time64(Precision6)
1787 );
1788
1789 assert!(DataTypeNode::new("Time64()").is_err());
1791
1792 assert!(DataTypeNode::new("Time64(x)").is_err());
1794 }
1795
1796 const ENUM_WITH_ESCAPING_STR: &str =
1797 "Enum8('f\\'' = 1, 'x =' = 2, 'b\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)";
1798
1799 fn enum_with_escaping() -> DataTypeNode {
1800 DataTypeNode::Enum(
1801 EnumType::Enum8,
1802 HashMap::from([
1803 (1, "f\\'".to_string()),
1804 (2, "x =".to_string()),
1805 (3, "b\\'\\'".to_string()),
1806 (42, "\\'c=4=".to_string()),
1807 (100, "4".to_string()),
1808 ]),
1809 )
1810 }
1811}