Skip to main content

clickhouse_types/
data_types.rs

1use crate::error::TypesError;
2use std::collections::HashMap;
3use std::fmt::{Display, Formatter};
4
5/// A definition of a column in the result set,
6/// taken out of the `RowBinaryWithNamesAndTypes` header.
7#[derive(Debug, Clone, PartialEq)]
8pub struct Column {
9    /// The name of the column.
10    pub name: String,
11    /// The data type of the column.
12    pub data_type: DataTypeNode,
13}
14
15impl Column {
16    #[allow(missing_docs)]
17    pub fn new(name: String, data_type: DataTypeNode) -> Self {
18        Self { name, data_type }
19    }
20}
21
22impl Display for Column {
23    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
24        write!(f, "{}: {}", self.name, self.data_type)
25    }
26}
27
28/// Represents a data type in ClickHouse.
29/// See <https://clickhouse.com/docs/sql-reference/data-types>
30#[derive(Debug, Clone, PartialEq)]
31#[non_exhaustive]
32#[allow(missing_docs)]
33pub enum DataTypeNode {
34    Bool,
35
36    UInt8,
37    UInt16,
38    UInt32,
39    UInt64,
40    UInt128,
41    UInt256,
42
43    Int8,
44    Int16,
45    Int32,
46    Int64,
47    Int128,
48    Int256,
49
50    Float32,
51    Float64,
52    BFloat16,
53
54    /// Scale, Precision, 32 | 64 | 128 | 256
55    Decimal(u8, u8, DecimalType),
56
57    String,
58    FixedString(usize),
59    UUID,
60
61    Date,
62    Date32,
63
64    /// Optional timezone
65    DateTime(Option<String>),
66    /// Precision and optional timezone
67    DateTime64(DateTimePrecision, Option<String>),
68
69    /// Time-of-day, no timezone (timezone is ignored in value operations)
70    Time,
71    /// Precision and optional timezone (timezone is ignored in value operations)
72    Time64(DateTimePrecision),
73
74    Interval(IntervalType),
75
76    IPv4,
77    IPv6,
78
79    Nullable(Box<DataTypeNode>),
80    LowCardinality(Box<DataTypeNode>),
81
82    Array(Box<DataTypeNode>),
83    Tuple(Vec<DataTypeNode>),
84    Enum(EnumType, HashMap<i16, String>),
85
86    /// Key-Value pairs are defined as an array, so it can be used as a slice
87    Map([Box<DataTypeNode>; 2]),
88
89    /// Function name and its arguments
90    AggregateFunction(String, Vec<DataTypeNode>),
91
92    /// Function name and the inner type.
93    /// The wire format is identical to the inner type; the function name is
94    /// metadata for the MergeTree engine, not the client protocol.
95    SimpleAggregateFunction(String, Box<DataTypeNode>),
96
97    /// Contains all possible types for this variant
98    Variant(Vec<DataTypeNode>),
99
100    Dynamic,
101    JSON,
102
103    // TODO: Rename for better representation
104    JsonWithHint(Vec<(String, Box<DataTypeNode>)>),
105    Point,
106    Ring,
107    LineString,
108    MultiLineString,
109    Polygon,
110    MultiPolygon,
111}
112
113impl DataTypeNode {
114    /// Parses a data type from a string that is received
115    /// in the `RowBinaryWithNamesAndTypes` and `Native` formats headers.
116    /// See also: <https://clickhouse.com/docs/interfaces/formats/RowBinaryWithNamesAndTypes#description>
117    pub fn new(name: &str) -> Result<Self, TypesError> {
118        match name {
119            "UInt8" => Ok(Self::UInt8),
120            "UInt16" => Ok(Self::UInt16),
121            "UInt32" => Ok(Self::UInt32),
122            "UInt64" => Ok(Self::UInt64),
123            "UInt128" => Ok(Self::UInt128),
124            "UInt256" => Ok(Self::UInt256),
125            "Int8" => Ok(Self::Int8),
126            "Int16" => Ok(Self::Int16),
127            "Int32" => Ok(Self::Int32),
128            "Int64" => Ok(Self::Int64),
129            "Int128" => Ok(Self::Int128),
130            "Int256" => Ok(Self::Int256),
131            "Float32" => Ok(Self::Float32),
132            "Float64" => Ok(Self::Float64),
133            "BFloat16" => Ok(Self::BFloat16),
134            "String" => Ok(Self::String),
135            "UUID" => Ok(Self::UUID),
136            "Date" => Ok(Self::Date),
137            "Date32" => Ok(Self::Date32),
138            "IPv4" => Ok(Self::IPv4),
139            "IPv6" => Ok(Self::IPv6),
140            "Bool" => Ok(Self::Bool),
141            "Dynamic" => Ok(Self::Dynamic),
142            "JSON" => Ok(Self::JSON),
143            "Point" => Ok(Self::Point),
144            "Ring" => Ok(Self::Ring),
145            "LineString" => Ok(Self::LineString),
146            "MultiLineString" => Ok(Self::MultiLineString),
147            "Polygon" => Ok(Self::Polygon),
148            "MultiPolygon" => Ok(Self::MultiPolygon),
149
150            str if str.starts_with("JSON(") => parse_json(str),
151
152            str if str.starts_with("Decimal") => parse_decimal(str),
153            str if str.starts_with("DateTime64") => parse_datetime64(str),
154            str if str.starts_with("DateTime") => parse_datetime(str),
155            str if str.starts_with("Time64") => parse_time64(str),
156            str if str.starts_with("Time") => Ok(Self::Time),
157            str if str.starts_with("Interval") => Ok(Self::Interval(str[8..].parse()?)),
158
159            str if str.starts_with("Nullable") => parse_nullable(str),
160            str if str.starts_with("LowCardinality") => parse_low_cardinality(str),
161            str if str.starts_with("FixedString") => parse_fixed_string(str),
162
163            str if str.starts_with("Array") => parse_array(str),
164            str if str.starts_with("Enum") => parse_enum(str),
165            str if str.starts_with("Map") => parse_map(str),
166            str if str.starts_with("Tuple") => parse_tuple(str),
167            str if str.starts_with("Variant") => parse_variant(str),
168
169            str if str.starts_with("SimpleAggregateFunction(") => {
170                parse_simple_aggregate_function(str)
171            }
172
173            // ...
174            str => Err(TypesError::TypeParsingError(format!(
175                "Unknown data type: {str}"
176            ))),
177        }
178    }
179
180    /// LowCardinality(T) -> T
181    pub fn remove_low_cardinality(&self) -> &DataTypeNode {
182        match self {
183            DataTypeNode::LowCardinality(inner) => inner,
184            _ => self,
185        }
186    }
187
188    /// SimpleAggregateFunction(fn, T) -> T
189    ///
190    /// The wire format of a `SimpleAggregateFunction` column is identical to
191    /// its inner type. This method strips the wrapper so that (de)serialization
192    /// validation can treat it as the inner type.
193    pub fn remove_simple_aggregate_function(&self) -> &DataTypeNode {
194        match self {
195            DataTypeNode::SimpleAggregateFunction(_, inner) => inner,
196            _ => self,
197        }
198    }
199}
200
201impl From<DataTypeNode> for String {
202    fn from(value: DataTypeNode) -> Self {
203        value.to_string()
204    }
205}
206
207impl Display for DataTypeNode {
208    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
209        use DataTypeNode::*;
210        match self {
211            UInt8 => write!(f, "UInt8"),
212            UInt16 => write!(f, "UInt16"),
213            UInt32 => write!(f, "UInt32"),
214            UInt64 => write!(f, "UInt64"),
215            UInt128 => write!(f, "UInt128"),
216            UInt256 => write!(f, "UInt256"),
217            Int8 => write!(f, "Int8"),
218            Int16 => write!(f, "Int16"),
219            Int32 => write!(f, "Int32"),
220            Int64 => write!(f, "Int64"),
221            Int128 => write!(f, "Int128"),
222            Int256 => write!(f, "Int256"),
223            Float32 => write!(f, "Float32"),
224            Float64 => write!(f, "Float64"),
225            BFloat16 => write!(f, "BFloat16"),
226            Decimal(precision, scale, _) => {
227                write!(f, "Decimal({precision}, {scale})")
228            }
229            String => write!(f, "String"),
230            UUID => write!(f, "UUID"),
231            Date => write!(f, "Date"),
232            Date32 => write!(f, "Date32"),
233            DateTime(None) => write!(f, "DateTime"),
234            DateTime(Some(tz)) => write!(f, "DateTime('{tz}')"),
235            DateTime64(precision, None) => write!(f, "DateTime64({precision})"),
236            DateTime64(precision, Some(tz)) => write!(f, "DateTime64({precision}, '{tz}')"),
237            Time => write!(f, "Time"),
238            Time64(precision) => write!(f, "Time64({precision})"),
239            Interval(interval) => write!(f, "Interval{interval}"),
240            IPv4 => write!(f, "IPv4"),
241            IPv6 => write!(f, "IPv6"),
242            Bool => write!(f, "Bool"),
243            Nullable(inner) => write!(f, "Nullable({inner})"),
244            Array(inner) => write!(f, "Array({inner})"),
245            Tuple(elements) => {
246                write!(f, "Tuple(")?;
247                for (i, element) in elements.iter().enumerate() {
248                    if i > 0 {
249                        write!(f, ", ")?;
250                    }
251                    write!(f, "{element}")?;
252                }
253                write!(f, ")")
254            }
255            Map([key, value]) => {
256                write!(f, "Map({key}, {value})")
257            }
258            LowCardinality(inner) => {
259                write!(f, "LowCardinality({inner})")
260            }
261            Enum(enum_type, values) => {
262                let mut values_vec = values.iter().collect::<Vec<_>>();
263                values_vec.sort_by(|(i1, _), (i2, _)| (*i1).cmp(*i2));
264                write!(f, "{enum_type}(")?;
265                for (i, (index, name)) in values_vec.iter().enumerate() {
266                    if i > 0 {
267                        write!(f, ", ")?;
268                    }
269                    write!(f, "'{name}' = {index}")?;
270                }
271                write!(f, ")")
272            }
273            AggregateFunction(func_name, args) => {
274                write!(f, "AggregateFunction({func_name}, ")?;
275                for (i, element) in args.iter().enumerate() {
276                    if i > 0 {
277                        write!(f, ", ")?;
278                    }
279                    write!(f, "{element}")?;
280                }
281                write!(f, ")")
282            }
283            SimpleAggregateFunction(func_name, inner) => {
284                write!(f, "SimpleAggregateFunction({func_name}, {inner})")
285            }
286            FixedString(size) => {
287                write!(f, "FixedString({size})")
288            }
289            Variant(types) => {
290                write!(f, "Variant(")?;
291                for (i, element) in types.iter().enumerate() {
292                    if i > 0 {
293                        write!(f, ", ")?;
294                    }
295                    write!(f, "{element}")?;
296                }
297                write!(f, ")")
298            }
299            JSON => write!(f, "JSON"),
300            Dynamic => write!(f, "Dynamic"),
301            Point => write!(f, "Point"),
302            Ring => write!(f, "Ring"),
303            LineString => write!(f, "LineString"),
304            MultiLineString => write!(f, "MultiLineString"),
305            Polygon => write!(f, "Polygon"),
306            MultiPolygon => write!(f, "MultiPolygon"),
307            JsonWithHint(json) => format_json_with_hint(json, f),
308        }
309    }
310}
311
312fn format_json_with_hint(
313    json: &[(String, Box<DataTypeNode>)],
314    f: &mut Formatter<'_>,
315) -> Result<(), std::fmt::Error> {
316    write!(f, "JSON(")?;
317
318    for (i, (name, ty)) in json.iter().enumerate() {
319        if i > 0 {
320            write!(f, ", ")?;
321        }
322        write!(f, "{} {}", name, ty)?;
323    }
324
325    write!(f, ")")
326}
327
328/// Represents the underlying integer size of an Enum type.
329#[derive(Debug, Clone, PartialEq)]
330pub enum EnumType {
331    /// Stored as an `Int8`
332    Enum8,
333    /// Stored as an `Int16`
334    Enum16,
335}
336
337impl Display for EnumType {
338    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
339        match self {
340            EnumType::Enum8 => write!(f, "Enum8"),
341            EnumType::Enum16 => write!(f, "Enum16"),
342        }
343    }
344}
345
346/// DateTime64 precision.
347/// Defined as an enum, as it is valid only in the range from 0 to 9.
348/// See also: <https://clickhouse.com/docs/sql-reference/data-types/datetime64>
349#[derive(Debug, Clone, PartialEq)]
350#[allow(missing_docs)]
351pub enum DateTimePrecision {
352    Precision0,
353    Precision1,
354    Precision2,
355    Precision3,
356    Precision4,
357    Precision5,
358    Precision6,
359    Precision7,
360    Precision8,
361    Precision9,
362}
363
364impl DateTimePrecision {
365    pub(crate) fn new(char: char) -> Result<DateTimePrecision, TypesError> {
366        match char {
367            '0' => Ok(DateTimePrecision::Precision0),
368            '1' => Ok(DateTimePrecision::Precision1),
369            '2' => Ok(DateTimePrecision::Precision2),
370            '3' => Ok(DateTimePrecision::Precision3),
371            '4' => Ok(DateTimePrecision::Precision4),
372            '5' => Ok(DateTimePrecision::Precision5),
373            '6' => Ok(DateTimePrecision::Precision6),
374            '7' => Ok(DateTimePrecision::Precision7),
375            '8' => Ok(DateTimePrecision::Precision8),
376            '9' => Ok(DateTimePrecision::Precision9),
377            _ => Err(TypesError::TypeParsingError(format!(
378                "Invalid DateTime64 precision, expected to be within [0, 9] interval, got {char}"
379            ))),
380        }
381    }
382}
383
384/// Represents the underlying integer type for a Decimal.
385/// See also: <https://clickhouse.com/docs/sql-reference/data-types/decimal>
386#[derive(Debug, Clone, PartialEq)]
387pub enum DecimalType {
388    /// Stored as an `Int32`
389    Decimal32,
390    /// Stored as an `Int64`
391    Decimal64,
392    /// Stored as an `Int128`
393    Decimal128,
394    /// Stored as an `Int256`
395    Decimal256,
396}
397
398impl Display for DecimalType {
399    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
400        match self {
401            DecimalType::Decimal32 => write!(f, "Decimal32"),
402            DecimalType::Decimal64 => write!(f, "Decimal64"),
403            DecimalType::Decimal128 => write!(f, "Decimal128"),
404            DecimalType::Decimal256 => write!(f, "Decimal256"),
405        }
406    }
407}
408
409impl DecimalType {
410    pub(crate) fn new(precision: u8) -> Result<Self, TypesError> {
411        if precision <= 9 {
412            Ok(DecimalType::Decimal32)
413        } else if precision <= 18 {
414            Ok(DecimalType::Decimal64)
415        } else if precision <= 38 {
416            Ok(DecimalType::Decimal128)
417        } else if precision <= 76 {
418            Ok(DecimalType::Decimal256)
419        } else {
420            Err(TypesError::TypeParsingError(format!(
421                "Invalid Decimal precision: {precision}"
422            )))
423        }
424    }
425}
426
427impl Display for DateTimePrecision {
428    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
429        match self {
430            DateTimePrecision::Precision0 => write!(f, "0"),
431            DateTimePrecision::Precision1 => write!(f, "1"),
432            DateTimePrecision::Precision2 => write!(f, "2"),
433            DateTimePrecision::Precision3 => write!(f, "3"),
434            DateTimePrecision::Precision4 => write!(f, "4"),
435            DateTimePrecision::Precision5 => write!(f, "5"),
436            DateTimePrecision::Precision6 => write!(f, "6"),
437            DateTimePrecision::Precision7 => write!(f, "7"),
438            DateTimePrecision::Precision8 => write!(f, "8"),
439            DateTimePrecision::Precision9 => write!(f, "9"),
440        }
441    }
442}
443
444/// Represents the type of an interval.
445/// See also: <https://clickhouse.com/docs/sql-reference/data-types/special-data-types/interval>
446#[derive(Debug, Clone, PartialEq)]
447#[allow(missing_docs)]
448pub enum IntervalType {
449    Nanosecond,
450    Microsecond,
451    Millisecond,
452    Second,
453    Minute,
454    Hour,
455    Day,
456    Week,
457    Month,
458    Quarter,
459    Year,
460}
461
462impl std::str::FromStr for IntervalType {
463    type Err = TypesError;
464
465    fn from_str(s: &str) -> Result<Self, Self::Err> {
466        match s {
467            "Nanosecond" => Ok(IntervalType::Nanosecond),
468            "Microsecond" => Ok(IntervalType::Microsecond),
469            "Millisecond" => Ok(IntervalType::Millisecond),
470            "Second" => Ok(IntervalType::Second),
471            "Minute" => Ok(IntervalType::Minute),
472            "Hour" => Ok(IntervalType::Hour),
473            "Day" => Ok(IntervalType::Day),
474            "Week" => Ok(IntervalType::Week),
475            "Month" => Ok(IntervalType::Month),
476            "Quarter" => Ok(IntervalType::Quarter),
477            "Year" => Ok(IntervalType::Year),
478            _ => Err(TypesError::TypeParsingError(format!(
479                "Unknown interval type: {s}"
480            ))),
481        }
482    }
483}
484
485impl Display for IntervalType {
486    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
487        match self {
488            Self::Nanosecond => write!(f, "Nanosecond"),
489            Self::Microsecond => write!(f, "Microsecond"),
490            Self::Millisecond => write!(f, "Millisecond"),
491            Self::Second => write!(f, "Second"),
492            Self::Minute => write!(f, "Minute"),
493            Self::Hour => write!(f, "Hour"),
494            Self::Day => write!(f, "Day"),
495            Self::Week => write!(f, "Week"),
496            Self::Month => write!(f, "Month"),
497            Self::Quarter => write!(f, "Quarter"),
498            Self::Year => write!(f, "Year"),
499        }
500    }
501}
502
503fn parse_fixed_string(input: &str) -> Result<DataTypeNode, TypesError> {
504    if input.len() >= 14 {
505        let size_str = &input[12..input.len() - 1];
506        let size = size_str.parse::<usize>().map_err(|err| {
507            TypesError::TypeParsingError(format!(
508                "Invalid FixedString size, expected a valid number. Underlying error: {err}, input: {input}, size_str: {size_str}"
509            ))
510        })?;
511        if size == 0 {
512            return Err(TypesError::TypeParsingError(format!(
513                "Invalid FixedString size, expected a positive number, got zero. Input: {input}"
514            )));
515        }
516        return Ok(DataTypeNode::FixedString(size));
517    }
518    Err(TypesError::TypeParsingError(format!(
519        "Invalid FixedString format, expected FixedString(N), got {input}"
520    )))
521}
522
523fn parse_array(input: &str) -> Result<DataTypeNode, TypesError> {
524    if input.len() >= 8 {
525        let inner_type_str = &input[6..input.len() - 1];
526        let inner_type = DataTypeNode::new(inner_type_str)?;
527        return Ok(DataTypeNode::Array(Box::new(inner_type)));
528    }
529    Err(TypesError::TypeParsingError(format!(
530        "Invalid Array format, expected Array(InnerType), got {input}"
531    )))
532}
533
534fn parse_enum(input: &str) -> Result<DataTypeNode, TypesError> {
535    if input.len() >= 9 {
536        let (enum_type, prefix_len) = if input.starts_with("Enum8") {
537            (EnumType::Enum8, 6)
538        } else if input.starts_with("Enum16") {
539            (EnumType::Enum16, 7)
540        } else {
541            return Err(TypesError::TypeParsingError(format!(
542                "Invalid Enum type, expected Enum8 or Enum16, got {input}"
543            )));
544        };
545        let enum_values_map_str = &input[prefix_len..input.len() - 1];
546        let enum_values_map = parse_enum_values_map(enum_values_map_str)?;
547        return Ok(DataTypeNode::Enum(enum_type, enum_values_map));
548    }
549    Err(TypesError::TypeParsingError(format!(
550        "Invalid Enum format, expected Enum8('name' = value), got {input}"
551    )))
552}
553
554fn parse_datetime(input: &str) -> Result<DataTypeNode, TypesError> {
555    if input == "DateTime" {
556        return Ok(DataTypeNode::DateTime(None));
557    }
558    if input.len() >= 12 {
559        let timezone = input[10..input.len() - 2].to_string();
560        return Ok(DataTypeNode::DateTime(Some(timezone)));
561    }
562    Err(TypesError::TypeParsingError(format!(
563        "Invalid DateTime format, expected DateTime('timezone'), got {input}"
564    )))
565}
566
567fn parse_decimal(input: &str) -> Result<DataTypeNode, TypesError> {
568    if input.len() >= 10 {
569        let precision_and_scale_str = input[8..input.len() - 1].split(", ").collect::<Vec<_>>();
570        if precision_and_scale_str.len() != 2 {
571            return Err(TypesError::TypeParsingError(format!(
572                "Invalid Decimal format, expected Decimal(P, S), got {input}"
573            )));
574        }
575        let parsed = precision_and_scale_str
576            .iter()
577            .map(|s| s.parse::<u8>())
578            .collect::<Result<Vec<_>, _>>()
579            .map_err(|err| {
580                TypesError::TypeParsingError(format!(
581                    "Invalid Decimal format, expected Decimal(P, S), got {input}. Underlying error: {err}"
582                ))
583            })?;
584        let precision = parsed[0];
585        let scale = parsed[1];
586        if scale < 1 || precision < 1 {
587            return Err(TypesError::TypeParsingError(format!(
588                "Invalid Decimal format, expected Decimal(P, S) with P > 0 and S > 0, got {input}"
589            )));
590        }
591        if precision < scale {
592            return Err(TypesError::TypeParsingError(format!(
593                "Invalid Decimal format, expected Decimal(P, S) with P >= S, got {input}"
594            )));
595        }
596        let size = DecimalType::new(parsed[0])?;
597        return Ok(DataTypeNode::Decimal(precision, scale, size));
598    }
599    Err(TypesError::TypeParsingError(format!(
600        "Invalid Decimal format, expected Decimal(P), got {input}"
601    )))
602}
603
604fn parse_datetime64(input: &str) -> Result<DataTypeNode, TypesError> {
605    if input.len() >= 13 {
606        let mut chars = input[11..input.len() - 1].chars();
607        let precision_char = chars.next().ok_or(TypesError::TypeParsingError(format!(
608            "Invalid DateTime64 precision, expected a positive number. Input: {input}"
609        )))?;
610        let precision = DateTimePrecision::new(precision_char)?;
611        let maybe_tz = match chars.as_str() {
612            str if str.len() > 2 => Some(str[3..str.len() - 1].to_string()),
613            _ => None,
614        };
615        return Ok(DataTypeNode::DateTime64(precision, maybe_tz));
616    }
617    Err(TypesError::TypeParsingError(format!(
618        "Invalid DateTime format, expected DateTime('timezone'), got {input}"
619    )))
620}
621
622fn parse_time64(input: &str) -> Result<DataTypeNode, TypesError> {
623    if input.len() >= 8 {
624        let mut chars = input[7..input.len() - 1].chars();
625        let precision_char = chars.next().ok_or(TypesError::TypeParsingError(format!(
626            "Invalid Time64 precision, expected a positive number. Input: {input}"
627        )))?;
628        let precision = DateTimePrecision::new(precision_char)?;
629
630        return Ok(DataTypeNode::Time64(precision));
631    }
632    Err(TypesError::TypeParsingError(format!(
633        "Invalid Time64 format, expected Time64(precision, 'timezone'), got {input}"
634    )))
635}
636
637fn parse_low_cardinality(input: &str) -> Result<DataTypeNode, TypesError> {
638    if input.len() >= 16 {
639        let inner_type_str = &input[15..input.len() - 1];
640        let inner_type = DataTypeNode::new(inner_type_str)?;
641        return Ok(DataTypeNode::LowCardinality(Box::new(inner_type)));
642    }
643    Err(TypesError::TypeParsingError(format!(
644        "Invalid LowCardinality format, expected LowCardinality(InnerType), got {input}"
645    )))
646}
647
648/// `SimpleAggregateFunction(func_name, InnerType)` is a transparent wrapper.
649/// The wire format is identical to `InnerType`; the function name is
650/// metadata for the MergeTree engine, not the client protocol.
651/// We preserve the full type so that it is correctly serialized back
652/// when sending column type headers during INSERT (RBWNAT format).
653fn parse_simple_aggregate_function(input: &str) -> Result<DataTypeNode, TypesError> {
654    let prefix = "SimpleAggregateFunction(";
655    let inner = &input[prefix.len()..input.len() - 1];
656    // Find the first top-level comma (not inside parentheses) to split
657    // the function name from the inner type.
658    let mut depth = 0u32;
659    let mut comma_pos = None;
660    for (i, b) in inner.bytes().enumerate() {
661        match b {
662            b'(' => depth += 1,
663            b')' => depth = depth.saturating_sub(1),
664            b',' if depth == 0 => {
665                comma_pos = Some(i);
666                break;
667            }
668            _ => {}
669        }
670    }
671    let comma_pos = comma_pos.ok_or_else(|| {
672        TypesError::TypeParsingError(format!("Invalid SimpleAggregateFunction: {input}"))
673    })?;
674    let func_name = inner[..comma_pos].trim().to_string();
675    let inner_type_str = inner[comma_pos + 1..].trim_start();
676    let inner_type = DataTypeNode::new(inner_type_str)?;
677    Ok(DataTypeNode::SimpleAggregateFunction(
678        func_name,
679        Box::new(inner_type),
680    ))
681}
682
683fn parse_nullable(input: &str) -> Result<DataTypeNode, TypesError> {
684    if input.len() >= 10 {
685        let inner_type_str = &input[9..input.len() - 1];
686        let inner_type = DataTypeNode::new(inner_type_str)?;
687        return Ok(DataTypeNode::Nullable(Box::new(inner_type)));
688    }
689    Err(TypesError::TypeParsingError(format!(
690        "Invalid Nullable format, expected Nullable(InnerType), got {input}"
691    )))
692}
693
694fn parse_map(input: &str) -> Result<DataTypeNode, TypesError> {
695    if input.len() >= 5 {
696        let inner_types_str = &input[4..input.len() - 1];
697        let inner_types = parse_inner_types(inner_types_str)?;
698        if inner_types.len() != 2 {
699            return Err(TypesError::TypeParsingError(format!(
700                "Expected two inner elements in a Map from input {input}"
701            )));
702        }
703        return Ok(DataTypeNode::Map([
704            Box::new(inner_types[0].clone()),
705            Box::new(inner_types[1].clone()),
706        ]));
707    }
708    Err(TypesError::TypeParsingError(format!(
709        "Invalid Map format, expected Map(KeyType, ValueType), got {input}"
710    )))
711}
712
713fn parse_json(input: &str) -> Result<DataTypeNode, TypesError> {
714    let columns = remove_json_header(input)?.split(',').collect::<Vec<_>>();
715
716    let inner_types = columns
717        .into_iter()
718        .map(|column| column.trim())
719        .filter(|column| !column.contains('=') && !column.starts_with("SKIP"))
720        .map(|column| {
721            let map = column.split(' ').collect::<Vec<_>>();
722            let key_type = map[0].to_string();
723            let value_type = DataTypeNode::new(map[1])?;
724
725            Ok((key_type, Box::new(value_type)))
726        })
727        .collect::<Result<Vec<(String, Box<DataTypeNode>)>, TypesError>>()?;
728
729    if inner_types.is_empty() {
730        return Ok(DataTypeNode::JSON);
731    }
732
733    Ok(DataTypeNode::JsonWithHint(inner_types))
734}
735
736fn remove_json_header(input: &str) -> Result<&str, TypesError> {
737    if input.starts_with("JSON") && input.ends_with(')') {
738        let new = input[5..].trim();
739
740        Ok(new.trim_end_matches(')'))
741    } else {
742        Err(TypesError::TypeParsingError(format!(
743            "Invalid JSON format, expected JSON(Type), got {input}"
744        )))
745    }
746}
747
748fn parse_tuple(input: &str) -> Result<DataTypeNode, TypesError> {
749    if input.len() > 7 {
750        let inner_types_str = &input[6..input.len() - 1];
751        let inner_types = parse_inner_types(inner_types_str)?;
752        if inner_types.is_empty() {
753            return Err(TypesError::TypeParsingError(format!(
754                "Expected at least one inner element in a Tuple from input {input}"
755            )));
756        }
757        return Ok(DataTypeNode::Tuple(inner_types));
758    }
759    Err(TypesError::TypeParsingError(format!(
760        "Invalid Tuple format, expected Tuple(Type1, Type2, ...), got {input}"
761    )))
762}
763
764fn parse_variant(input: &str) -> Result<DataTypeNode, TypesError> {
765    if input.len() >= 9 {
766        let inner_types_str = &input[8..input.len() - 1];
767        let inner_types = parse_inner_types(inner_types_str)?;
768        return Ok(DataTypeNode::Variant(inner_types));
769    }
770    Err(TypesError::TypeParsingError(format!(
771        "Invalid Variant format, expected Variant(Type1, Type2, ...), got {input}"
772    )))
773}
774
775/// Considers the element type parsed once we reach a comma outside of parens AND after an unescaped tick.
776/// The most complicated cases are values names in the self-defined Enum types:
777/// ```
778///  let input1 = "Tuple(Enum8('f\'()' = 1))";  // the result is  `f\'()`
779///  let input2 = "Tuple(Enum8('(' = 1))";       // the result is  `(`
780/// ```
781fn parse_inner_types(input: &str) -> Result<Vec<DataTypeNode>, TypesError> {
782    let mut inner_types: Vec<DataTypeNode> = Vec::new();
783
784    let input_bytes = input.as_bytes();
785
786    let mut open_parens = 0;
787    let mut quote_open = false;
788    let mut char_escaped = false;
789    let mut last_element_index = 0;
790
791    let mut i = 0;
792    while i < input_bytes.len() {
793        if char_escaped {
794            char_escaped = false;
795        } else if input_bytes[i] == b'\\' {
796            char_escaped = true;
797        } else if input_bytes[i] == b'\'' {
798            quote_open = !quote_open; // unescaped quote
799        } else if !quote_open {
800            if input_bytes[i] == b'(' {
801                open_parens += 1;
802            } else if input_bytes[i] == b')' {
803                open_parens -= 1;
804            } else if input_bytes[i] == b',' && open_parens == 0 {
805                let data_type_str = String::from_utf8(input_bytes[last_element_index..i].to_vec())
806                    .map_err(|_| {
807                        TypesError::TypeParsingError(format!(
808                            "Invalid UTF-8 sequence in input for the inner data type: {}",
809                            &input[last_element_index..]
810                        ))
811                    })?;
812                let data_type = DataTypeNode::new(&data_type_str)?;
813                inner_types.push(data_type);
814                // Skip ', ' (comma and space)
815                if i + 2 <= input_bytes.len() && input_bytes[i + 1] == b' ' {
816                    i += 2;
817                } else {
818                    i += 1;
819                }
820                last_element_index = i;
821                continue; // Skip the normal increment at the end of the loop
822            }
823        }
824        i += 1;
825    }
826
827    // Push the remaining part of the type if it seems to be valid (at least all parentheses are closed)
828    if open_parens == 0 && last_element_index < input_bytes.len() {
829        let data_type_str =
830            String::from_utf8(input_bytes[last_element_index..].to_vec()).map_err(|_| {
831                TypesError::TypeParsingError(format!(
832                    "Invalid UTF-8 sequence in input for the inner data type: {}",
833                    &input[last_element_index..]
834                ))
835            })?;
836        let data_type = DataTypeNode::new(&data_type_str)?;
837        inner_types.push(data_type);
838    }
839
840    Ok(inner_types)
841}
842
843#[inline]
844fn parse_enum_index(input_bytes: &[u8], input: &str) -> Result<i16, TypesError> {
845    String::from_utf8(input_bytes.to_vec())
846        .map_err(|_| {
847            TypesError::TypeParsingError(format!(
848                "Invalid UTF-8 sequence in input for the enum index: {}",
849                &input
850            ))
851        })?
852        .parse::<i16>()
853        .map_err(|_| {
854            TypesError::TypeParsingError(format!(
855                "Invalid Enum index, expected a valid number. Input: {input}"
856            ))
857        })
858}
859
860fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError> {
861    let mut names: Vec<String> = Vec::new();
862    let mut indices: Vec<i16> = Vec::new();
863    let mut parsing_name = true; // false when parsing the index
864    let mut char_escaped = false; // we should ignore escaped ticks
865    let mut start_index = 1; // Skip the first '
866
867    let mut i = 1;
868    let input_bytes = input.as_bytes();
869    while i < input_bytes.len() {
870        if parsing_name {
871            if char_escaped {
872                char_escaped = false;
873            } else if input_bytes[i] == b'\\' {
874                char_escaped = true;
875            } else if input_bytes[i] == b'\'' {
876                // non-escaped closing tick - push the name
877                let name_bytes = &input_bytes[start_index..i];
878                let name = String::from_utf8(name_bytes.to_vec()).map_err(|_| {
879                    TypesError::TypeParsingError(format!(
880                        "Invalid UTF-8 sequence in input for the enum name: {}",
881                        &input[start_index..i]
882                    ))
883                })?;
884                names.push(name);
885
886                // Skip ` = ` and the first digit, as it will always have at least one
887                if i + 4 >= input_bytes.len() {
888                    return Err(TypesError::TypeParsingError(format!(
889                        "Invalid Enum format - expected ` = ` after name, input: {input}",
890                    )));
891                }
892                i += 4;
893                start_index = i;
894                parsing_name = false;
895            }
896        }
897        // Parsing the index, skipping next iterations until the first non-digit one
898        else if input_bytes[i] < b'0' || input_bytes[i] > b'9' {
899            let index = parse_enum_index(&input_bytes[start_index..i], input)?;
900            indices.push(index);
901
902            // the char at this index should be comma
903            // Skip `, '`, but not the first char - ClickHouse allows something like Enum8('foo' = 0, '' = 42)
904            if i + 2 >= input_bytes.len() {
905                break; // At the end of the enum, no more entries
906            }
907            i += 2;
908            start_index = i + 1;
909            parsing_name = true;
910            char_escaped = false;
911        }
912
913        i += 1;
914    }
915
916    let index = parse_enum_index(&input_bytes[start_index..i], input)?;
917    indices.push(index);
918
919    if names.len() != indices.len() {
920        return Err(TypesError::TypeParsingError(format!(
921            "Invalid Enum format - expected the same number of names and indices, got names: {}, indices: {}",
922            names.join(", "),
923            indices
924                .iter()
925                .map(|index| index.to_string())
926                .collect::<Vec<String>>()
927                .join(", "),
928        )));
929    }
930
931    Ok(indices
932        .into_iter()
933        .zip(names)
934        .collect::<HashMap<i16, String>>())
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940
941    #[test]
942    fn test_aggregate_function_display() {
943        let simple = DataTypeNode::AggregateFunction("sum".to_string(), vec![DataTypeNode::UInt64]);
944        assert_eq!(simple.to_string(), "AggregateFunction(sum, UInt64)");
945
946        let complex = DataTypeNode::AggregateFunction(
947            "groupArray".to_string(),
948            vec![
949                DataTypeNode::String,
950                DataTypeNode::UInt32,
951                DataTypeNode::Nullable(Box::new(DataTypeNode::Float64)),
952            ],
953        );
954        assert_eq!(
955            complex.to_string(),
956            "AggregateFunction(groupArray, String, UInt32, Nullable(Float64))"
957        );
958    }
959
960    #[test]
961    fn test_tuple_display() {
962        let empty = DataTypeNode::Tuple(vec![]);
963        assert_eq!(empty.to_string(), "Tuple()");
964
965        let single = DataTypeNode::Tuple(vec![DataTypeNode::String]);
966        assert_eq!(single.to_string(), "Tuple(String)");
967
968        let multiple = DataTypeNode::Tuple(vec![
969            DataTypeNode::UInt64,
970            DataTypeNode::String,
971            DataTypeNode::DateTime(None),
972            DataTypeNode::Array(Box::new(DataTypeNode::Int32)),
973        ]);
974        assert_eq!(
975            multiple.to_string(),
976            "Tuple(UInt64, String, DateTime, Array(Int32))"
977        );
978    }
979
980    #[test]
981    fn test_json_with_hint_display() {
982        let json_with_hint = DataTypeNode::JsonWithHint(vec![
983            ("foo".to_string(), Box::new(DataTypeNode::String)),
984            ("bar".to_string(), Box::new(DataTypeNode::Int32)),
985        ]);
986        assert_eq!(
987            json_with_hint.to_string(),
988            "JSON(foo String, bar Int32)".to_string()
989        );
990    }
991
992    #[test]
993    fn test_enum_display() {
994        let mut values1 = HashMap::new();
995        values1.insert(1, "one".to_string());
996        values1.insert(2, "two".to_string());
997        values1.insert(3, "three".to_string());
998
999        let simple_enum = DataTypeNode::Enum(EnumType::Enum8, values1);
1000        assert_eq!(
1001            simple_enum.to_string(),
1002            "Enum8('one' = 1, 'two' = 2, 'three' = 3)"
1003        );
1004
1005        // Enum with unordered values (should sort by index)
1006        let mut values2 = HashMap::new();
1007        values2.insert(10, "ten".to_string());
1008        values2.insert(1, "one".to_string());
1009        values2.insert(5, "five".to_string());
1010
1011        let ordered_enum = DataTypeNode::Enum(EnumType::Enum16, values2);
1012        assert_eq!(
1013            ordered_enum.to_string(),
1014            "Enum16('one' = 1, 'five' = 5, 'ten' = 10)"
1015        );
1016    }
1017
1018    #[test]
1019    fn test_variant_display() {
1020        // Empty variant
1021        let empty = DataTypeNode::Variant(vec![]);
1022        assert_eq!(empty.to_string(), "Variant()");
1023
1024        // Single type variant
1025        let single = DataTypeNode::Variant(vec![DataTypeNode::String]);
1026        assert_eq!(single.to_string(), "Variant(String)");
1027
1028        // Multiple types variant
1029        let multiple = DataTypeNode::Variant(vec![
1030            DataTypeNode::UInt64,
1031            DataTypeNode::String,
1032            DataTypeNode::Nullable(Box::new(DataTypeNode::DateTime(None))),
1033            DataTypeNode::Array(Box::new(DataTypeNode::Int32)),
1034        ]);
1035        assert_eq!(
1036            multiple.to_string(),
1037            "Variant(UInt64, String, Nullable(DateTime), Array(Int32))"
1038        );
1039
1040        // Nested variant
1041        let nested = DataTypeNode::Variant(vec![
1042            DataTypeNode::Tuple(vec![DataTypeNode::String, DataTypeNode::UInt64]),
1043            DataTypeNode::Map([
1044                Box::new(DataTypeNode::String),
1045                Box::new(DataTypeNode::Int32),
1046            ]),
1047        ]);
1048        assert_eq!(
1049            nested.to_string(),
1050            "Variant(Tuple(String, UInt64), Map(String, Int32))"
1051        );
1052    }
1053
1054    #[test]
1055    fn test_data_type_new_simple() {
1056        assert_eq!(DataTypeNode::new("UInt8").unwrap(), DataTypeNode::UInt8);
1057        assert_eq!(DataTypeNode::new("UInt16").unwrap(), DataTypeNode::UInt16);
1058        assert_eq!(DataTypeNode::new("UInt32").unwrap(), DataTypeNode::UInt32);
1059        assert_eq!(DataTypeNode::new("UInt64").unwrap(), DataTypeNode::UInt64);
1060        assert_eq!(DataTypeNode::new("UInt128").unwrap(), DataTypeNode::UInt128);
1061        assert_eq!(DataTypeNode::new("UInt256").unwrap(), DataTypeNode::UInt256);
1062        assert_eq!(DataTypeNode::new("Int8").unwrap(), DataTypeNode::Int8);
1063        assert_eq!(DataTypeNode::new("Int16").unwrap(), DataTypeNode::Int16);
1064        assert_eq!(DataTypeNode::new("Int32").unwrap(), DataTypeNode::Int32);
1065        assert_eq!(DataTypeNode::new("Int64").unwrap(), DataTypeNode::Int64);
1066        assert_eq!(DataTypeNode::new("Int128").unwrap(), DataTypeNode::Int128);
1067        assert_eq!(DataTypeNode::new("Int256").unwrap(), DataTypeNode::Int256);
1068        assert_eq!(DataTypeNode::new("Float32").unwrap(), DataTypeNode::Float32);
1069        assert_eq!(DataTypeNode::new("Float64").unwrap(), DataTypeNode::Float64);
1070        assert_eq!(
1071            DataTypeNode::new("BFloat16").unwrap(),
1072            DataTypeNode::BFloat16
1073        );
1074        assert_eq!(DataTypeNode::new("String").unwrap(), DataTypeNode::String);
1075        assert_eq!(DataTypeNode::new("UUID").unwrap(), DataTypeNode::UUID);
1076        assert_eq!(DataTypeNode::new("Date").unwrap(), DataTypeNode::Date);
1077        assert_eq!(DataTypeNode::new("Date32").unwrap(), DataTypeNode::Date32);
1078        assert_eq!(DataTypeNode::new("IPv4").unwrap(), DataTypeNode::IPv4);
1079        assert_eq!(DataTypeNode::new("IPv6").unwrap(), DataTypeNode::IPv6);
1080        assert_eq!(DataTypeNode::new("Bool").unwrap(), DataTypeNode::Bool);
1081        assert_eq!(DataTypeNode::new("Dynamic").unwrap(), DataTypeNode::Dynamic);
1082        assert_eq!(DataTypeNode::new("JSON").unwrap(), DataTypeNode::JSON);
1083        assert_eq!(
1084            DataTypeNode::new(
1085                "JSON(max_dynamic_types=8, max_dynamic_paths=64, SKIP internal_metrics)"
1086            )
1087            .unwrap(),
1088            DataTypeNode::JSON
1089        );
1090        assert_eq!(
1091            DataTypeNode::new(
1092                "JSON(max_dynamic_types=8, max_dynamic_paths=64, SKIP internal_metrics, foo String, bar Int32)"
1093            )
1094            .unwrap(),
1095            DataTypeNode::JsonWithHint(vec![
1096                ("foo".to_string(), Box::new(DataTypeNode::String)),
1097                ("bar".to_string(), Box::new(DataTypeNode::Int32))
1098            ])
1099        );
1100        assert!(DataTypeNode::new("SomeUnknownType").is_err());
1101    }
1102
1103    #[test]
1104    fn test_data_type_new_fixed_string() {
1105        assert_eq!(
1106            DataTypeNode::new("FixedString(1)").unwrap(),
1107            DataTypeNode::FixedString(1)
1108        );
1109        assert_eq!(
1110            DataTypeNode::new("FixedString(16)").unwrap(),
1111            DataTypeNode::FixedString(16)
1112        );
1113        assert_eq!(
1114            DataTypeNode::new("FixedString(255)").unwrap(),
1115            DataTypeNode::FixedString(255)
1116        );
1117        assert_eq!(
1118            DataTypeNode::new("FixedString(65535)").unwrap(),
1119            DataTypeNode::FixedString(65_535)
1120        );
1121        assert!(DataTypeNode::new("FixedString()").is_err());
1122        assert!(DataTypeNode::new("FixedString(0)").is_err());
1123        assert!(DataTypeNode::new("FixedString(-1)").is_err());
1124        assert!(DataTypeNode::new("FixedString(abc)").is_err());
1125    }
1126
1127    #[test]
1128    fn test_data_type_new_array() {
1129        assert_eq!(
1130            DataTypeNode::new("Array(UInt8)").unwrap(),
1131            DataTypeNode::Array(Box::new(DataTypeNode::UInt8))
1132        );
1133        assert_eq!(
1134            DataTypeNode::new("Array(String)").unwrap(),
1135            DataTypeNode::Array(Box::new(DataTypeNode::String))
1136        );
1137        assert_eq!(
1138            DataTypeNode::new("Array(FixedString(16))").unwrap(),
1139            DataTypeNode::Array(Box::new(DataTypeNode::FixedString(16)))
1140        );
1141        assert_eq!(
1142            DataTypeNode::new("Array(Nullable(Int32))").unwrap(),
1143            DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1144                DataTypeNode::Int32
1145            ))))
1146        );
1147        assert!(DataTypeNode::new("Array()").is_err());
1148        assert!(DataTypeNode::new("Array(abc)").is_err());
1149    }
1150
1151    #[test]
1152    fn test_data_type_new_decimal() {
1153        assert_eq!(
1154            DataTypeNode::new("Decimal(7, 2)").unwrap(),
1155            DataTypeNode::Decimal(7, 2, DecimalType::Decimal32)
1156        );
1157        assert_eq!(
1158            DataTypeNode::new("Decimal(12, 4)").unwrap(),
1159            DataTypeNode::Decimal(12, 4, DecimalType::Decimal64)
1160        );
1161        assert_eq!(
1162            DataTypeNode::new("Decimal(27, 6)").unwrap(),
1163            DataTypeNode::Decimal(27, 6, DecimalType::Decimal128)
1164        );
1165        assert_eq!(
1166            DataTypeNode::new("Decimal(42, 8)").unwrap(),
1167            DataTypeNode::Decimal(42, 8, DecimalType::Decimal256)
1168        );
1169        assert!(DataTypeNode::new("Decimal").is_err());
1170        assert!(DataTypeNode::new("Decimal(").is_err());
1171        assert!(DataTypeNode::new("Decimal()").is_err());
1172        assert!(DataTypeNode::new("Decimal(1)").is_err());
1173        assert!(DataTypeNode::new("Decimal(1,)").is_err());
1174        assert!(DataTypeNode::new("Decimal(1, )").is_err());
1175        assert!(DataTypeNode::new("Decimal(0, 0)").is_err()); // Precision must be > 0
1176        assert!(DataTypeNode::new("Decimal(x, 0)").is_err()); // Non-numeric precision
1177        assert!(DataTypeNode::new("Decimal(', ')").is_err());
1178        assert!(DataTypeNode::new("Decimal(77, 1)").is_err()); // Max precision is 76
1179        assert!(DataTypeNode::new("Decimal(1, 2)").is_err()); // Scale must be less than precision
1180        assert!(DataTypeNode::new("Decimal(1, x)").is_err()); // Non-numeric scale
1181        assert!(DataTypeNode::new("Decimal(42, ,)").is_err());
1182        assert!(DataTypeNode::new("Decimal(42, ')").is_err());
1183        assert!(DataTypeNode::new("Decimal(foobar)").is_err());
1184    }
1185
1186    #[test]
1187    fn test_data_type_new_datetime() {
1188        assert_eq!(
1189            DataTypeNode::new("DateTime").unwrap(),
1190            DataTypeNode::DateTime(None)
1191        );
1192        assert_eq!(
1193            DataTypeNode::new("DateTime('UTC')").unwrap(),
1194            DataTypeNode::DateTime(Some("UTC".to_string()))
1195        );
1196        assert_eq!(
1197            DataTypeNode::new("DateTime('America/New_York')").unwrap(),
1198            DataTypeNode::DateTime(Some("America/New_York".to_string()))
1199        );
1200        assert!(DataTypeNode::new("DateTime()").is_err());
1201    }
1202
1203    #[test]
1204    fn test_data_type_new_datetime64() {
1205        assert_eq!(
1206            DataTypeNode::new("DateTime64(0)").unwrap(),
1207            DataTypeNode::DateTime64(DateTimePrecision::Precision0, None)
1208        );
1209        assert_eq!(
1210            DataTypeNode::new("DateTime64(1)").unwrap(),
1211            DataTypeNode::DateTime64(DateTimePrecision::Precision1, None)
1212        );
1213        assert_eq!(
1214            DataTypeNode::new("DateTime64(2)").unwrap(),
1215            DataTypeNode::DateTime64(DateTimePrecision::Precision2, None)
1216        );
1217        assert_eq!(
1218            DataTypeNode::new("DateTime64(3)").unwrap(),
1219            DataTypeNode::DateTime64(DateTimePrecision::Precision3, None)
1220        );
1221        assert_eq!(
1222            DataTypeNode::new("DateTime64(4)").unwrap(),
1223            DataTypeNode::DateTime64(DateTimePrecision::Precision4, None)
1224        );
1225        assert_eq!(
1226            DataTypeNode::new("DateTime64(5)").unwrap(),
1227            DataTypeNode::DateTime64(DateTimePrecision::Precision5, None)
1228        );
1229        assert_eq!(
1230            DataTypeNode::new("DateTime64(6)").unwrap(),
1231            DataTypeNode::DateTime64(DateTimePrecision::Precision6, None)
1232        );
1233        assert_eq!(
1234            DataTypeNode::new("DateTime64(7)").unwrap(),
1235            DataTypeNode::DateTime64(DateTimePrecision::Precision7, None)
1236        );
1237        assert_eq!(
1238            DataTypeNode::new("DateTime64(8)").unwrap(),
1239            DataTypeNode::DateTime64(DateTimePrecision::Precision8, None)
1240        );
1241        assert_eq!(
1242            DataTypeNode::new("DateTime64(9)").unwrap(),
1243            DataTypeNode::DateTime64(DateTimePrecision::Precision9, None)
1244        );
1245        assert_eq!(
1246            DataTypeNode::new("DateTime64(0, 'UTC')").unwrap(),
1247            DataTypeNode::DateTime64(DateTimePrecision::Precision0, Some("UTC".to_string()))
1248        );
1249        assert_eq!(
1250            DataTypeNode::new("DateTime64(3, 'America/New_York')").unwrap(),
1251            DataTypeNode::DateTime64(
1252                DateTimePrecision::Precision3,
1253                Some("America/New_York".to_string())
1254            )
1255        );
1256        assert_eq!(
1257            DataTypeNode::new("DateTime64(6, 'America/New_York')").unwrap(),
1258            DataTypeNode::DateTime64(
1259                DateTimePrecision::Precision6,
1260                Some("America/New_York".to_string())
1261            )
1262        );
1263        assert_eq!(
1264            DataTypeNode::new("DateTime64(9, 'Europe/Amsterdam')").unwrap(),
1265            DataTypeNode::DateTime64(
1266                DateTimePrecision::Precision9,
1267                Some("Europe/Amsterdam".to_string())
1268            )
1269        );
1270        assert!(DataTypeNode::new("DateTime64()").is_err());
1271        assert!(DataTypeNode::new("DateTime64(x)").is_err());
1272    }
1273
1274    #[test]
1275    fn test_data_type_new_time() {
1276        assert_eq!(DataTypeNode::new("Time").unwrap(), DataTypeNode::Time);
1277        assert_eq!(
1278            DataTypeNode::new("Time('UTC')").unwrap(),
1279            DataTypeNode::Time
1280        );
1281        assert_eq!(
1282            DataTypeNode::new("Time('America/New_York')").unwrap(),
1283            DataTypeNode::Time
1284        );
1285        assert_eq!(DataTypeNode::new("Time()").unwrap(), DataTypeNode::Time);
1286    }
1287
1288    #[test]
1289    fn test_data_type_new_time64() {
1290        assert_eq!(
1291            DataTypeNode::new("Time64(0)").unwrap(),
1292            DataTypeNode::Time64(DateTimePrecision::Precision0)
1293        );
1294        assert_eq!(
1295            DataTypeNode::new("Time64(1)").unwrap(),
1296            DataTypeNode::Time64(DateTimePrecision::Precision1)
1297        );
1298        assert_eq!(
1299            DataTypeNode::new("Time64(2)").unwrap(),
1300            DataTypeNode::Time64(DateTimePrecision::Precision2)
1301        );
1302        assert_eq!(
1303            DataTypeNode::new("Time64(3)").unwrap(),
1304            DataTypeNode::Time64(DateTimePrecision::Precision3)
1305        );
1306        assert_eq!(
1307            DataTypeNode::new("Time64(4)").unwrap(),
1308            DataTypeNode::Time64(DateTimePrecision::Precision4)
1309        );
1310        assert_eq!(
1311            DataTypeNode::new("Time64(5)").unwrap(),
1312            DataTypeNode::Time64(DateTimePrecision::Precision5)
1313        );
1314        assert_eq!(
1315            DataTypeNode::new("Time64(6)").unwrap(),
1316            DataTypeNode::Time64(DateTimePrecision::Precision6)
1317        );
1318        assert_eq!(
1319            DataTypeNode::new("Time64(7)").unwrap(),
1320            DataTypeNode::Time64(DateTimePrecision::Precision7)
1321        );
1322        assert_eq!(
1323            DataTypeNode::new("Time64(8)").unwrap(),
1324            DataTypeNode::Time64(DateTimePrecision::Precision8)
1325        );
1326        assert_eq!(
1327            DataTypeNode::new("Time64(9)").unwrap(),
1328            DataTypeNode::Time64(DateTimePrecision::Precision9)
1329        );
1330        assert_eq!(
1331            DataTypeNode::new("Time64(0, 'UTC')").unwrap(),
1332            DataTypeNode::Time64(DateTimePrecision::Precision0)
1333        );
1334        assert_eq!(
1335            DataTypeNode::new("Time64(3, 'America/New_York')").unwrap(),
1336            DataTypeNode::Time64(DateTimePrecision::Precision3)
1337        );
1338        assert_eq!(
1339            DataTypeNode::new("Time64(6, 'America/New_York')").unwrap(),
1340            DataTypeNode::Time64(DateTimePrecision::Precision6)
1341        );
1342        assert_eq!(
1343            DataTypeNode::new("Time64(9, 'Europe/Amsterdam')").unwrap(),
1344            DataTypeNode::Time64(DateTimePrecision::Precision9)
1345        );
1346        assert!(DataTypeNode::new("Time64()").is_err());
1347        assert!(DataTypeNode::new("Time64(x)").is_err());
1348    }
1349
1350    #[test]
1351    fn test_data_type_new_interval() {
1352        assert_eq!(
1353            DataTypeNode::new("IntervalNanosecond").unwrap(),
1354            DataTypeNode::Interval(IntervalType::Nanosecond)
1355        );
1356        assert_eq!(
1357            DataTypeNode::new("IntervalMicrosecond").unwrap(),
1358            DataTypeNode::Interval(IntervalType::Microsecond)
1359        );
1360        assert_eq!(
1361            DataTypeNode::new("IntervalMillisecond").unwrap(),
1362            DataTypeNode::Interval(IntervalType::Millisecond)
1363        );
1364        assert_eq!(
1365            DataTypeNode::new("IntervalSecond").unwrap(),
1366            DataTypeNode::Interval(IntervalType::Second)
1367        );
1368        assert_eq!(
1369            DataTypeNode::new("IntervalMinute").unwrap(),
1370            DataTypeNode::Interval(IntervalType::Minute)
1371        );
1372        assert_eq!(
1373            DataTypeNode::new("IntervalHour").unwrap(),
1374            DataTypeNode::Interval(IntervalType::Hour)
1375        );
1376        assert_eq!(
1377            DataTypeNode::new("IntervalDay").unwrap(),
1378            DataTypeNode::Interval(IntervalType::Day)
1379        );
1380        assert_eq!(
1381            DataTypeNode::new("IntervalWeek").unwrap(),
1382            DataTypeNode::Interval(IntervalType::Week)
1383        );
1384        assert_eq!(
1385            DataTypeNode::new("IntervalMonth").unwrap(),
1386            DataTypeNode::Interval(IntervalType::Month)
1387        );
1388        assert_eq!(
1389            DataTypeNode::new("IntervalQuarter").unwrap(),
1390            DataTypeNode::Interval(IntervalType::Quarter)
1391        );
1392        assert_eq!(
1393            DataTypeNode::new("IntervalYear").unwrap(),
1394            DataTypeNode::Interval(IntervalType::Year)
1395        );
1396    }
1397
1398    #[test]
1399    fn test_data_type_new_low_cardinality() {
1400        assert_eq!(
1401            DataTypeNode::new("LowCardinality(UInt8)").unwrap(),
1402            DataTypeNode::LowCardinality(Box::new(DataTypeNode::UInt8))
1403        );
1404        assert_eq!(
1405            DataTypeNode::new("LowCardinality(String)").unwrap(),
1406            DataTypeNode::LowCardinality(Box::new(DataTypeNode::String))
1407        );
1408        assert_eq!(
1409            DataTypeNode::new("LowCardinality(Array(Int32))").unwrap(),
1410            DataTypeNode::LowCardinality(Box::new(DataTypeNode::Array(Box::new(
1411                DataTypeNode::Int32
1412            ))))
1413        );
1414        assert_eq!(
1415            DataTypeNode::new("LowCardinality(Nullable(Int32))").unwrap(),
1416            DataTypeNode::LowCardinality(Box::new(DataTypeNode::Nullable(Box::new(
1417                DataTypeNode::Int32
1418            ))))
1419        );
1420        assert!(DataTypeNode::new("LowCardinality").is_err());
1421        assert!(DataTypeNode::new("LowCardinality()").is_err());
1422        assert!(DataTypeNode::new("LowCardinality(X)").is_err());
1423    }
1424
1425    #[test]
1426    fn test_data_type_new_nullable() {
1427        assert_eq!(
1428            DataTypeNode::new("Nullable(UInt8)").unwrap(),
1429            DataTypeNode::Nullable(Box::new(DataTypeNode::UInt8))
1430        );
1431        assert_eq!(
1432            DataTypeNode::new("Nullable(String)").unwrap(),
1433            DataTypeNode::Nullable(Box::new(DataTypeNode::String))
1434        );
1435        assert!(DataTypeNode::new("Nullable").is_err());
1436        assert!(DataTypeNode::new("Nullable()").is_err());
1437        assert!(DataTypeNode::new("Nullable(X)").is_err());
1438    }
1439
1440    #[test]
1441    fn test_data_type_new_map() {
1442        assert_eq!(
1443            DataTypeNode::new("Map(UInt8, String)").unwrap(),
1444            DataTypeNode::Map([
1445                Box::new(DataTypeNode::UInt8),
1446                Box::new(DataTypeNode::String)
1447            ])
1448        );
1449        assert_eq!(
1450            DataTypeNode::new("Map(String, Int32)").unwrap(),
1451            DataTypeNode::Map([
1452                Box::new(DataTypeNode::String),
1453                Box::new(DataTypeNode::Int32)
1454            ])
1455        );
1456        assert_eq!(
1457            DataTypeNode::new("Map(String, Map(Int32, Array(Nullable(String))))").unwrap(),
1458            DataTypeNode::Map([
1459                Box::new(DataTypeNode::String),
1460                Box::new(DataTypeNode::Map([
1461                    Box::new(DataTypeNode::Int32),
1462                    Box::new(DataTypeNode::Array(Box::new(DataTypeNode::Nullable(
1463                        Box::new(DataTypeNode::String)
1464                    ))))
1465                ]))
1466            ])
1467        );
1468        assert!(DataTypeNode::new("Map()").is_err());
1469        assert!(DataTypeNode::new("Map").is_err());
1470        assert!(DataTypeNode::new("Map(K)").is_err());
1471        assert!(DataTypeNode::new("Map(K, V)").is_err());
1472        assert!(DataTypeNode::new("Map(Int32, V)").is_err());
1473        assert!(DataTypeNode::new("Map(K, Int32)").is_err());
1474        assert!(DataTypeNode::new("Map(String, Int32").is_err());
1475    }
1476
1477    #[test]
1478    fn test_data_type_new_variant() {
1479        assert_eq!(
1480            DataTypeNode::new("Variant(UInt8, String)").unwrap(),
1481            DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::String])
1482        );
1483        assert_eq!(
1484            DataTypeNode::new("Variant(String, Int32)").unwrap(),
1485            DataTypeNode::Variant(vec![DataTypeNode::String, DataTypeNode::Int32])
1486        );
1487        assert_eq!(
1488            DataTypeNode::new("Variant(Int32, Array(Nullable(String)), Map(Int32, String))")
1489                .unwrap(),
1490            DataTypeNode::Variant(vec![
1491                DataTypeNode::Int32,
1492                DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1493                    DataTypeNode::String
1494                )))),
1495                DataTypeNode::Map([
1496                    Box::new(DataTypeNode::Int32),
1497                    Box::new(DataTypeNode::String)
1498                ])
1499            ])
1500        );
1501        assert!(DataTypeNode::new("Variant").is_err());
1502    }
1503
1504    #[test]
1505    fn test_data_type_new_tuple() {
1506        assert_eq!(
1507            DataTypeNode::new("Tuple(UInt8, String)").unwrap(),
1508            DataTypeNode::Tuple(vec![DataTypeNode::UInt8, DataTypeNode::String])
1509        );
1510        assert_eq!(
1511            DataTypeNode::new("Tuple(String, Int32)").unwrap(),
1512            DataTypeNode::Tuple(vec![DataTypeNode::String, DataTypeNode::Int32])
1513        );
1514        assert_eq!(
1515            DataTypeNode::new("Tuple(Bool,Int32)").unwrap(),
1516            DataTypeNode::Tuple(vec![DataTypeNode::Bool, DataTypeNode::Int32])
1517        );
1518        assert_eq!(
1519            DataTypeNode::new(
1520                "Tuple(Int32, Array(Nullable(String)), Map(Int32, Tuple(String, Array(UInt8))))"
1521            )
1522            .unwrap(),
1523            DataTypeNode::Tuple(vec![
1524                DataTypeNode::Int32,
1525                DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1526                    DataTypeNode::String
1527                )))),
1528                DataTypeNode::Map([
1529                    Box::new(DataTypeNode::Int32),
1530                    Box::new(DataTypeNode::Tuple(vec![
1531                        DataTypeNode::String,
1532                        DataTypeNode::Array(Box::new(DataTypeNode::UInt8))
1533                    ]))
1534                ])
1535            ])
1536        );
1537        assert_eq!(
1538            DataTypeNode::new(&format!("Tuple(String, {ENUM_WITH_ESCAPING_STR})")).unwrap(),
1539            DataTypeNode::Tuple(vec![DataTypeNode::String, enum_with_escaping()])
1540        );
1541        assert!(DataTypeNode::new("Tuple").is_err());
1542        assert!(DataTypeNode::new("Tuple(").is_err());
1543        assert!(DataTypeNode::new("Tuple()").is_err());
1544        assert!(DataTypeNode::new("Tuple(,)").is_err());
1545        assert!(DataTypeNode::new("Tuple(X)").is_err());
1546        assert!(DataTypeNode::new("Tuple(Int32, X)").is_err());
1547        assert!(DataTypeNode::new("Tuple(Int32, String, X)").is_err());
1548    }
1549
1550    #[test]
1551    fn test_data_type_new_enum() {
1552        assert_eq!(
1553            DataTypeNode::new("Enum8('A' = -42)").unwrap(),
1554            DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(-42, "A".to_string())]))
1555        );
1556        assert_eq!(
1557            DataTypeNode::new("Enum16('A' = -144)").unwrap(),
1558            DataTypeNode::Enum(EnumType::Enum16, HashMap::from([(-144, "A".to_string())]))
1559        );
1560        assert_eq!(
1561            DataTypeNode::new("Enum8('A' = 1, 'B' = 2)").unwrap(),
1562            DataTypeNode::Enum(
1563                EnumType::Enum8,
1564                HashMap::from([(1, "A".to_string()), (2, "B".to_string())])
1565            )
1566        );
1567        assert_eq!(
1568            DataTypeNode::new("Enum16('A' = 1, 'B' = 2)").unwrap(),
1569            DataTypeNode::Enum(
1570                EnumType::Enum16,
1571                HashMap::from([(1, "A".to_string()), (2, "B".to_string())])
1572            )
1573        );
1574        assert_eq!(
1575            DataTypeNode::new(ENUM_WITH_ESCAPING_STR).unwrap(),
1576            enum_with_escaping()
1577        );
1578        assert_eq!(
1579            DataTypeNode::new("Enum8('foo' = 0, '' = 42)").unwrap(),
1580            DataTypeNode::Enum(
1581                EnumType::Enum8,
1582                HashMap::from([(0, "foo".to_string()), (42, "".to_string())])
1583            )
1584        );
1585
1586        assert!(DataTypeNode::new("Enum()").is_err());
1587        assert!(DataTypeNode::new("Enum8()").is_err());
1588        assert!(DataTypeNode::new("Enum16()").is_err());
1589        assert!(DataTypeNode::new("Enum32('A' = 1, 'B' = 2)").is_err());
1590        assert!(DataTypeNode::new("Enum32('A','B')").is_err());
1591        assert!(DataTypeNode::new("Enum32('A' = 1, 'B')").is_err());
1592        assert!(DataTypeNode::new("Enum32('A' = 1, 'B' =)").is_err());
1593        assert!(DataTypeNode::new("Enum32('A' = 1, 'B' = )").is_err());
1594        assert!(DataTypeNode::new("Enum32('A'= 1,'B' =)").is_err());
1595    }
1596
1597    #[test]
1598    fn test_data_type_new_geo() {
1599        assert_eq!(DataTypeNode::new("Point").unwrap(), DataTypeNode::Point);
1600        assert_eq!(DataTypeNode::new("Ring").unwrap(), DataTypeNode::Ring);
1601        assert_eq!(
1602            DataTypeNode::new("LineString").unwrap(),
1603            DataTypeNode::LineString
1604        );
1605        assert_eq!(DataTypeNode::new("Polygon").unwrap(), DataTypeNode::Polygon);
1606        assert_eq!(
1607            DataTypeNode::new("MultiLineString").unwrap(),
1608            DataTypeNode::MultiLineString
1609        );
1610        assert_eq!(
1611            DataTypeNode::new("MultiPolygon").unwrap(),
1612            DataTypeNode::MultiPolygon
1613        );
1614    }
1615
1616    #[test]
1617    fn test_data_type_to_string_simple() {
1618        // Simple types
1619        assert_eq!(DataTypeNode::UInt8.to_string(), "UInt8");
1620        assert_eq!(DataTypeNode::UInt16.to_string(), "UInt16");
1621        assert_eq!(DataTypeNode::UInt32.to_string(), "UInt32");
1622        assert_eq!(DataTypeNode::UInt64.to_string(), "UInt64");
1623        assert_eq!(DataTypeNode::UInt128.to_string(), "UInt128");
1624        assert_eq!(DataTypeNode::UInt256.to_string(), "UInt256");
1625        assert_eq!(DataTypeNode::Int8.to_string(), "Int8");
1626        assert_eq!(DataTypeNode::Int16.to_string(), "Int16");
1627        assert_eq!(DataTypeNode::Int32.to_string(), "Int32");
1628        assert_eq!(DataTypeNode::Int64.to_string(), "Int64");
1629        assert_eq!(DataTypeNode::Int128.to_string(), "Int128");
1630        assert_eq!(DataTypeNode::Int256.to_string(), "Int256");
1631        assert_eq!(DataTypeNode::Float32.to_string(), "Float32");
1632        assert_eq!(DataTypeNode::Float64.to_string(), "Float64");
1633        assert_eq!(DataTypeNode::BFloat16.to_string(), "BFloat16");
1634        assert_eq!(DataTypeNode::UUID.to_string(), "UUID");
1635        assert_eq!(DataTypeNode::Date.to_string(), "Date");
1636        assert_eq!(DataTypeNode::Date32.to_string(), "Date32");
1637        assert_eq!(DataTypeNode::IPv4.to_string(), "IPv4");
1638        assert_eq!(DataTypeNode::IPv6.to_string(), "IPv6");
1639        assert_eq!(DataTypeNode::Bool.to_string(), "Bool");
1640        assert_eq!(DataTypeNode::Dynamic.to_string(), "Dynamic");
1641        assert_eq!(DataTypeNode::JSON.to_string(), "JSON");
1642        assert_eq!(DataTypeNode::String.to_string(), "String");
1643    }
1644
1645    #[test]
1646    fn test_data_types_to_string_complex() {
1647        assert_eq!(DataTypeNode::DateTime(None).to_string(), "DateTime");
1648        assert_eq!(
1649            DataTypeNode::DateTime(Some("UTC".to_string())).to_string(),
1650            "DateTime('UTC')"
1651        );
1652        assert_eq!(
1653            DataTypeNode::DateTime(Some("America/New_York".to_string())).to_string(),
1654            "DateTime('America/New_York')"
1655        );
1656
1657        assert_eq!(
1658            DataTypeNode::Nullable(Box::new(DataTypeNode::UInt64)).to_string(),
1659            "Nullable(UInt64)"
1660        );
1661        assert_eq!(
1662            DataTypeNode::LowCardinality(Box::new(DataTypeNode::String)).to_string(),
1663            "LowCardinality(String)"
1664        );
1665        assert_eq!(
1666            DataTypeNode::Array(Box::new(DataTypeNode::String)).to_string(),
1667            "Array(String)"
1668        );
1669        assert_eq!(
1670            DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1671                DataTypeNode::String
1672            ))))
1673            .to_string(),
1674            "Array(Nullable(String))"
1675        );
1676        assert_eq!(
1677            DataTypeNode::Tuple(vec![
1678                DataTypeNode::String,
1679                DataTypeNode::UInt32,
1680                DataTypeNode::Float64
1681            ])
1682            .to_string(),
1683            "Tuple(String, UInt32, Float64)"
1684        );
1685        assert_eq!(
1686            DataTypeNode::Map([
1687                Box::new(DataTypeNode::String),
1688                Box::new(DataTypeNode::UInt32)
1689            ])
1690            .to_string(),
1691            "Map(String, UInt32)"
1692        );
1693        assert_eq!(
1694            DataTypeNode::Decimal(10, 2, DecimalType::Decimal32).to_string(),
1695            "Decimal(10, 2)"
1696        );
1697        assert_eq!(
1698            DataTypeNode::Enum(
1699                EnumType::Enum8,
1700                HashMap::from([(1, "A".to_string()), (2, "B".to_string())]),
1701            )
1702            .to_string(),
1703            "Enum8('A' = 1, 'B' = 2)"
1704        );
1705        assert_eq!(
1706            DataTypeNode::Enum(
1707                EnumType::Enum16,
1708                HashMap::from([(42, "foo".to_string()), (144, "bar".to_string())]),
1709            )
1710            .to_string(),
1711            "Enum16('foo' = 42, 'bar' = 144)"
1712        );
1713        assert_eq!(enum_with_escaping().to_string(), ENUM_WITH_ESCAPING_STR);
1714        assert_eq!(
1715            DataTypeNode::AggregateFunction("sum".to_string(), vec![DataTypeNode::UInt64])
1716                .to_string(),
1717            "AggregateFunction(sum, UInt64)"
1718        );
1719        assert_eq!(DataTypeNode::FixedString(16).to_string(), "FixedString(16)");
1720        assert_eq!(
1721            DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::Bool]).to_string(),
1722            "Variant(UInt8, Bool)"
1723        );
1724    }
1725
1726    #[test]
1727    fn test_datetime64_to_string() {
1728        let test_cases = [
1729            (
1730                DataTypeNode::DateTime64(DateTimePrecision::Precision0, None),
1731                "DateTime64(0)",
1732            ),
1733            (
1734                DataTypeNode::DateTime64(DateTimePrecision::Precision1, None),
1735                "DateTime64(1)",
1736            ),
1737            (
1738                DataTypeNode::DateTime64(DateTimePrecision::Precision2, None),
1739                "DateTime64(2)",
1740            ),
1741            (
1742                DataTypeNode::DateTime64(DateTimePrecision::Precision3, None),
1743                "DateTime64(3)",
1744            ),
1745            (
1746                DataTypeNode::DateTime64(DateTimePrecision::Precision4, None),
1747                "DateTime64(4)",
1748            ),
1749            (
1750                DataTypeNode::DateTime64(DateTimePrecision::Precision5, None),
1751                "DateTime64(5)",
1752            ),
1753            (
1754                DataTypeNode::DateTime64(DateTimePrecision::Precision6, None),
1755                "DateTime64(6)",
1756            ),
1757            (
1758                DataTypeNode::DateTime64(DateTimePrecision::Precision7, None),
1759                "DateTime64(7)",
1760            ),
1761            (
1762                DataTypeNode::DateTime64(DateTimePrecision::Precision8, None),
1763                "DateTime64(8)",
1764            ),
1765            (
1766                DataTypeNode::DateTime64(DateTimePrecision::Precision9, None),
1767                "DateTime64(9)",
1768            ),
1769            (
1770                DataTypeNode::DateTime64(DateTimePrecision::Precision0, Some("UTC".to_string())),
1771                "DateTime64(0, 'UTC')",
1772            ),
1773            (
1774                DataTypeNode::DateTime64(
1775                    DateTimePrecision::Precision3,
1776                    Some("America/New_York".to_string()),
1777                ),
1778                "DateTime64(3, 'America/New_York')",
1779            ),
1780            (
1781                DataTypeNode::DateTime64(
1782                    DateTimePrecision::Precision6,
1783                    Some("Europe/Amsterdam".to_string()),
1784                ),
1785                "DateTime64(6, 'Europe/Amsterdam')",
1786            ),
1787            (
1788                DataTypeNode::DateTime64(
1789                    DateTimePrecision::Precision9,
1790                    Some("Asia/Tokyo".to_string()),
1791                ),
1792                "DateTime64(9, 'Asia/Tokyo')",
1793            ),
1794        ];
1795        for (data_type, expected_str) in test_cases.iter() {
1796            assert_eq!(
1797                &data_type.to_string(),
1798                expected_str,
1799                "Expected data type {data_type} to be formatted as {expected_str}"
1800            );
1801        }
1802    }
1803
1804    #[test]
1805    fn test_interval_to_string() {
1806        assert_eq!(
1807            DataTypeNode::Interval(IntervalType::Nanosecond).to_string(),
1808            "IntervalNanosecond"
1809        );
1810        assert_eq!(
1811            DataTypeNode::Interval(IntervalType::Microsecond).to_string(),
1812            "IntervalMicrosecond"
1813        );
1814        assert_eq!(
1815            DataTypeNode::Interval(IntervalType::Millisecond).to_string(),
1816            "IntervalMillisecond"
1817        );
1818        assert_eq!(
1819            DataTypeNode::Interval(IntervalType::Second).to_string(),
1820            "IntervalSecond"
1821        );
1822        assert_eq!(
1823            DataTypeNode::Interval(IntervalType::Minute).to_string(),
1824            "IntervalMinute"
1825        );
1826        assert_eq!(
1827            DataTypeNode::Interval(IntervalType::Hour).to_string(),
1828            "IntervalHour"
1829        );
1830        assert_eq!(
1831            DataTypeNode::Interval(IntervalType::Day).to_string(),
1832            "IntervalDay"
1833        );
1834        assert_eq!(
1835            DataTypeNode::Interval(IntervalType::Week).to_string(),
1836            "IntervalWeek"
1837        );
1838        assert_eq!(
1839            DataTypeNode::Interval(IntervalType::Month).to_string(),
1840            "IntervalMonth"
1841        );
1842        assert_eq!(
1843            DataTypeNode::Interval(IntervalType::Quarter).to_string(),
1844            "IntervalQuarter"
1845        );
1846        assert_eq!(
1847            DataTypeNode::Interval(IntervalType::Year).to_string(),
1848            "IntervalYear"
1849        );
1850    }
1851
1852    #[test]
1853    fn test_data_type_node_into_string() {
1854        let data_type = DataTypeNode::new("Array(Int32)").unwrap();
1855        let data_type_string: String = data_type.into();
1856        assert_eq!(data_type_string, "Array(Int32)");
1857    }
1858
1859    #[test]
1860    fn test_data_type_to_string_geo() {
1861        assert_eq!(DataTypeNode::Point.to_string(), "Point");
1862        assert_eq!(DataTypeNode::Ring.to_string(), "Ring");
1863        assert_eq!(DataTypeNode::LineString.to_string(), "LineString");
1864        assert_eq!(DataTypeNode::Polygon.to_string(), "Polygon");
1865        assert_eq!(DataTypeNode::MultiLineString.to_string(), "MultiLineString");
1866        assert_eq!(DataTypeNode::MultiPolygon.to_string(), "MultiPolygon");
1867    }
1868
1869    #[test]
1870    fn test_display_column() {
1871        let column = Column::new(
1872            "col".to_string(),
1873            DataTypeNode::new("Array(Int32)").unwrap(),
1874        );
1875        assert_eq!(column.to_string(), "col: Array(Int32)");
1876    }
1877
1878    #[test]
1879    fn test_display_decimal_size() {
1880        assert_eq!(DecimalType::Decimal32.to_string(), "Decimal32");
1881        assert_eq!(DecimalType::Decimal64.to_string(), "Decimal64");
1882        assert_eq!(DecimalType::Decimal128.to_string(), "Decimal128");
1883        assert_eq!(DecimalType::Decimal256.to_string(), "Decimal256");
1884    }
1885
1886    #[test]
1887    fn test_time_time64_roundtrip_and_edges() {
1888        use super::DateTimePrecision::*;
1889
1890        // Valid "Time" type (no precision, no timezone)
1891        assert_eq!(DataTypeNode::new("Time").unwrap(), DataTypeNode::Time);
1892
1893        // "Time" should ignore timezones – they are parsed but discarded
1894        assert_eq!(
1895            DataTypeNode::new("Time('UTC')").unwrap(),
1896            DataTypeNode::Time
1897        );
1898        assert_eq!(
1899            DataTypeNode::new("Time('Europe/Moscow')").unwrap(),
1900            DataTypeNode::Time
1901        );
1902
1903        // Time64 with precision 0 (seconds)
1904        assert_eq!(
1905            DataTypeNode::new("Time64(0)").unwrap(),
1906            DataTypeNode::Time64(Precision0)
1907        );
1908
1909        // Time64 with precision 9 and a timezone (timezone ignored)
1910        assert_eq!(
1911            DataTypeNode::new("Time64(9, 'Europe/Amsterdam')").unwrap(),
1912            DataTypeNode::Time64(Precision9)
1913        );
1914
1915        // Time64 with precision 0 and timezone (again, timezone ignored)
1916        assert_eq!(
1917            DataTypeNode::new("Time64(0, 'UTC')").unwrap(),
1918            DataTypeNode::Time64(Precision0)
1919        );
1920
1921        // Time64 with precision 3 (milliseconds), no timezone
1922        assert_eq!(
1923            DataTypeNode::new("Time64(3)").unwrap(),
1924            DataTypeNode::Time64(Precision3)
1925        );
1926
1927        // Time64 with precision 6 (microseconds), timezone present but ignored
1928        assert_eq!(
1929            DataTypeNode::new("Time64(6, 'America/New_York')").unwrap(),
1930            DataTypeNode::Time64(Precision6)
1931        );
1932
1933        // Invalid: Empty argument list
1934        assert!(DataTypeNode::new("Time64()").is_err());
1935
1936        // Invalid: Non-numeric precision
1937        assert!(DataTypeNode::new("Time64(x)").is_err());
1938    }
1939
1940    const ENUM_WITH_ESCAPING_STR: &str =
1941        "Enum8('f\\'' = 1, 'x =' = 2, 'b\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)";
1942
1943    fn enum_with_escaping() -> DataTypeNode {
1944        DataTypeNode::Enum(
1945            EnumType::Enum8,
1946            HashMap::from([
1947                (1, "f\\'".to_string()),
1948                (2, "x =".to_string()),
1949                (3, "b\\'\\'".to_string()),
1950                (42, "\\'c=4=".to_string()),
1951                (100, "4".to_string()),
1952            ]),
1953        )
1954    }
1955
1956    #[test]
1957    fn simple_aggregate_function_min_uint32() {
1958        let dt = DataTypeNode::new("SimpleAggregateFunction(min, UInt32)").unwrap();
1959        match dt {
1960            DataTypeNode::SimpleAggregateFunction(func, inner) => {
1961                assert_eq!(func, "min");
1962                assert_eq!(*inner, DataTypeNode::UInt32);
1963            }
1964            other => panic!("expected SimpleAggregateFunction, got {other:?}"),
1965        }
1966    }
1967
1968    #[test]
1969    fn simple_aggregate_function_max_uint64() {
1970        let dt = DataTypeNode::new("SimpleAggregateFunction(max, UInt64)").unwrap();
1971        match dt {
1972            DataTypeNode::SimpleAggregateFunction(func, inner) => {
1973                assert_eq!(func, "max");
1974                assert_eq!(*inner, DataTypeNode::UInt64);
1975            }
1976            other => panic!("expected SimpleAggregateFunction, got {other:?}"),
1977        }
1978    }
1979
1980    #[test]
1981    fn simple_aggregate_function_sum_float64() {
1982        let dt = DataTypeNode::new("SimpleAggregateFunction(sum, Float64)").unwrap();
1983        match dt {
1984            DataTypeNode::SimpleAggregateFunction(func, inner) => {
1985                assert_eq!(func, "sum");
1986                assert_eq!(*inner, DataTypeNode::Float64);
1987            }
1988            other => panic!("expected SimpleAggregateFunction, got {other:?}"),
1989        }
1990    }
1991
1992    #[test]
1993    fn simple_aggregate_function_group_bit_and_uint8() {
1994        let dt = DataTypeNode::new("SimpleAggregateFunction(groupBitAnd, UInt8)").unwrap();
1995        match dt {
1996            DataTypeNode::SimpleAggregateFunction(func, inner) => {
1997                assert_eq!(func, "groupBitAnd");
1998                assert_eq!(*inner, DataTypeNode::UInt8);
1999            }
2000            other => panic!("expected SimpleAggregateFunction, got {other:?}"),
2001        }
2002    }
2003
2004    #[test]
2005    fn simple_aggregate_function_with_array_inner() {
2006        let dt =
2007            DataTypeNode::new("SimpleAggregateFunction(groupArrayArray, Array(UInt32))").unwrap();
2008        match dt {
2009            DataTypeNode::SimpleAggregateFunction(func, inner) => {
2010                assert_eq!(func, "groupArrayArray");
2011                assert_eq!(*inner, DataTypeNode::Array(Box::new(DataTypeNode::UInt32)));
2012            }
2013            other => panic!("expected SimpleAggregateFunction, got {other:?}"),
2014        }
2015    }
2016
2017    #[test]
2018    fn simple_aggregate_function_invalid_format() {
2019        let result = DataTypeNode::new("SimpleAggregateFunction(min)");
2020        assert!(result.is_err());
2021    }
2022
2023    #[test]
2024    fn simple_aggregate_function_display_roundtrip() {
2025        let input = "SimpleAggregateFunction(min, UInt32)";
2026        let dt = DataTypeNode::new(input).unwrap();
2027        assert_eq!(dt.to_string(), input);
2028
2029        let input2 = "SimpleAggregateFunction(groupArrayArray, Array(UInt32))";
2030        let dt2 = DataTypeNode::new(input2).unwrap();
2031        assert_eq!(dt2.to_string(), input2);
2032    }
2033
2034    #[test]
2035    fn simple_aggregate_function_remove() {
2036        let dt = DataTypeNode::new("SimpleAggregateFunction(min, UInt32)").unwrap();
2037        assert_eq!(*dt.remove_simple_aggregate_function(), DataTypeNode::UInt32);
2038
2039        // Non-SimpleAggregateFunction should return self
2040        let dt2 = DataTypeNode::UInt64;
2041        assert_eq!(
2042            *dt2.remove_simple_aggregate_function(),
2043            DataTypeNode::UInt64
2044        );
2045    }
2046}