1use crate::error::TypesError;
2use std::collections::HashMap;
3use std::fmt::{Display, Formatter};
4
5#[derive(Debug, Clone, PartialEq)]
8pub struct Column {
9 pub name: String,
11 pub data_type: DataTypeNode,
13}
14
15impl Column {
16 #[allow(missing_docs)]
17 pub fn new(name: String, data_type: DataTypeNode) -> Self {
18 Self { name, data_type }
19 }
20}
21
22impl Display for Column {
23 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
24 write!(f, "{}: {}", self.name, self.data_type)
25 }
26}
27
28#[derive(Debug, Clone, PartialEq)]
31#[non_exhaustive]
32#[allow(missing_docs)]
33pub enum DataTypeNode {
34 Bool,
35
36 UInt8,
37 UInt16,
38 UInt32,
39 UInt64,
40 UInt128,
41 UInt256,
42
43 Int8,
44 Int16,
45 Int32,
46 Int64,
47 Int128,
48 Int256,
49
50 Float32,
51 Float64,
52 BFloat16,
53
54 Decimal(u8, u8, DecimalType),
56
57 String,
58 FixedString(usize),
59 UUID,
60
61 Date,
62 Date32,
63
64 DateTime(Option<String>),
66 DateTime64(DateTimePrecision, Option<String>),
68
69 Time,
71 Time64(DateTimePrecision),
73
74 Interval(IntervalType),
75
76 IPv4,
77 IPv6,
78
79 Nullable(Box<DataTypeNode>),
80 LowCardinality(Box<DataTypeNode>),
81
82 Array(Box<DataTypeNode>),
83 Tuple(Vec<DataTypeNode>),
84 Enum(EnumType, HashMap<i16, String>),
85
86 Map([Box<DataTypeNode>; 2]),
88
89 AggregateFunction(String, Vec<DataTypeNode>),
91
92 Variant(Vec<DataTypeNode>),
94
95 Dynamic,
96 JSON,
97
98 Point,
99 Ring,
100 LineString,
101 MultiLineString,
102 Polygon,
103 MultiPolygon,
104}
105
106impl DataTypeNode {
107 pub fn new(name: &str) -> Result<Self, TypesError> {
111 match name {
112 "UInt8" => Ok(Self::UInt8),
113 "UInt16" => Ok(Self::UInt16),
114 "UInt32" => Ok(Self::UInt32),
115 "UInt64" => Ok(Self::UInt64),
116 "UInt128" => Ok(Self::UInt128),
117 "UInt256" => Ok(Self::UInt256),
118 "Int8" => Ok(Self::Int8),
119 "Int16" => Ok(Self::Int16),
120 "Int32" => Ok(Self::Int32),
121 "Int64" => Ok(Self::Int64),
122 "Int128" => Ok(Self::Int128),
123 "Int256" => Ok(Self::Int256),
124 "Float32" => Ok(Self::Float32),
125 "Float64" => Ok(Self::Float64),
126 "BFloat16" => Ok(Self::BFloat16),
127 "String" => Ok(Self::String),
128 "UUID" => Ok(Self::UUID),
129 "Date" => Ok(Self::Date),
130 "Date32" => Ok(Self::Date32),
131 "IPv4" => Ok(Self::IPv4),
132 "IPv6" => Ok(Self::IPv6),
133 "Bool" => Ok(Self::Bool),
134 "Dynamic" => Ok(Self::Dynamic),
135 "JSON" => Ok(Self::JSON),
136 "Point" => Ok(Self::Point),
137 "Ring" => Ok(Self::Ring),
138 "LineString" => Ok(Self::LineString),
139 "MultiLineString" => Ok(Self::MultiLineString),
140 "Polygon" => Ok(Self::Polygon),
141 "MultiPolygon" => Ok(Self::MultiPolygon),
142
143 str if str.starts_with("JSON") => Ok(Self::JSON),
144
145 str if str.starts_with("Decimal") => parse_decimal(str),
146 str if str.starts_with("DateTime64") => parse_datetime64(str),
147 str if str.starts_with("DateTime") => parse_datetime(str),
148 str if str.starts_with("Time64") => parse_time64(str),
149 str if str.starts_with("Time") => Ok(Self::Time),
150 str if str.starts_with("Interval") => Ok(Self::Interval(str[8..].parse()?)),
151
152 str if str.starts_with("Nullable") => parse_nullable(str),
153 str if str.starts_with("LowCardinality") => parse_low_cardinality(str),
154 str if str.starts_with("FixedString") => parse_fixed_string(str),
155
156 str if str.starts_with("Array") => parse_array(str),
157 str if str.starts_with("Enum") => parse_enum(str),
158 str if str.starts_with("Map") => parse_map(str),
159 str if str.starts_with("Tuple") => parse_tuple(str),
160 str if str.starts_with("Variant") => parse_variant(str),
161
162 str => Err(TypesError::TypeParsingError(format!(
164 "Unknown data type: {str}"
165 ))),
166 }
167 }
168
169 pub fn remove_low_cardinality(&self) -> &DataTypeNode {
171 match self {
172 DataTypeNode::LowCardinality(inner) => inner,
173 _ => self,
174 }
175 }
176}
177
178impl From<DataTypeNode> for String {
179 fn from(value: DataTypeNode) -> Self {
180 value.to_string()
181 }
182}
183
184impl Display for DataTypeNode {
185 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
186 use DataTypeNode::*;
187 match self {
188 UInt8 => write!(f, "UInt8"),
189 UInt16 => write!(f, "UInt16"),
190 UInt32 => write!(f, "UInt32"),
191 UInt64 => write!(f, "UInt64"),
192 UInt128 => write!(f, "UInt128"),
193 UInt256 => write!(f, "UInt256"),
194 Int8 => write!(f, "Int8"),
195 Int16 => write!(f, "Int16"),
196 Int32 => write!(f, "Int32"),
197 Int64 => write!(f, "Int64"),
198 Int128 => write!(f, "Int128"),
199 Int256 => write!(f, "Int256"),
200 Float32 => write!(f, "Float32"),
201 Float64 => write!(f, "Float64"),
202 BFloat16 => write!(f, "BFloat16"),
203 Decimal(precision, scale, _) => {
204 write!(f, "Decimal({precision}, {scale})")
205 }
206 String => write!(f, "String"),
207 UUID => write!(f, "UUID"),
208 Date => write!(f, "Date"),
209 Date32 => write!(f, "Date32"),
210 DateTime(None) => write!(f, "DateTime"),
211 DateTime(Some(tz)) => write!(f, "DateTime('{tz}')"),
212 DateTime64(precision, None) => write!(f, "DateTime64({precision})"),
213 DateTime64(precision, Some(tz)) => write!(f, "DateTime64({precision}, '{tz}')"),
214 Time => write!(f, "Time"),
215 Time64(precision) => write!(f, "Time64({precision})"),
216 Interval(interval) => write!(f, "Interval{interval}"),
217 IPv4 => write!(f, "IPv4"),
218 IPv6 => write!(f, "IPv6"),
219 Bool => write!(f, "Bool"),
220 Nullable(inner) => write!(f, "Nullable({inner})"),
221 Array(inner) => write!(f, "Array({inner})"),
222 Tuple(elements) => {
223 write!(f, "Tuple(")?;
224 for (i, element) in elements.iter().enumerate() {
225 if i > 0 {
226 write!(f, ", ")?;
227 }
228 write!(f, "{element}")?;
229 }
230 write!(f, ")")
231 }
232 Map([key, value]) => {
233 write!(f, "Map({key}, {value})")
234 }
235 LowCardinality(inner) => {
236 write!(f, "LowCardinality({inner})")
237 }
238 Enum(enum_type, values) => {
239 let mut values_vec = values.iter().collect::<Vec<_>>();
240 values_vec.sort_by(|(i1, _), (i2, _)| (*i1).cmp(*i2));
241 write!(f, "{enum_type}(")?;
242 for (i, (index, name)) in values_vec.iter().enumerate() {
243 if i > 0 {
244 write!(f, ", ")?;
245 }
246 write!(f, "'{name}' = {index}")?;
247 }
248 write!(f, ")")
249 }
250 AggregateFunction(func_name, args) => {
251 write!(f, "AggregateFunction({func_name}, ")?;
252 for (i, element) in args.iter().enumerate() {
253 if i > 0 {
254 write!(f, ", ")?;
255 }
256 write!(f, "{element}")?;
257 }
258 write!(f, ")")
259 }
260 FixedString(size) => {
261 write!(f, "FixedString({size})")
262 }
263 Variant(types) => {
264 write!(f, "Variant(")?;
265 for (i, element) in types.iter().enumerate() {
266 if i > 0 {
267 write!(f, ", ")?;
268 }
269 write!(f, "{element}")?;
270 }
271 write!(f, ")")
272 }
273 JSON => write!(f, "JSON"),
274 Dynamic => write!(f, "Dynamic"),
275 Point => write!(f, "Point"),
276 Ring => write!(f, "Ring"),
277 LineString => write!(f, "LineString"),
278 MultiLineString => write!(f, "MultiLineString"),
279 Polygon => write!(f, "Polygon"),
280 MultiPolygon => write!(f, "MultiPolygon"),
281 }
282 }
283}
284
285#[derive(Debug, Clone, PartialEq)]
287pub enum EnumType {
288 Enum8,
290 Enum16,
292}
293
294impl Display for EnumType {
295 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
296 match self {
297 EnumType::Enum8 => write!(f, "Enum8"),
298 EnumType::Enum16 => write!(f, "Enum16"),
299 }
300 }
301}
302
303#[derive(Debug, Clone, PartialEq)]
307#[allow(missing_docs)]
308pub enum DateTimePrecision {
309 Precision0,
310 Precision1,
311 Precision2,
312 Precision3,
313 Precision4,
314 Precision5,
315 Precision6,
316 Precision7,
317 Precision8,
318 Precision9,
319}
320
321impl DateTimePrecision {
322 pub(crate) fn new(char: char) -> Result<DateTimePrecision, TypesError> {
323 match char {
324 '0' => Ok(DateTimePrecision::Precision0),
325 '1' => Ok(DateTimePrecision::Precision1),
326 '2' => Ok(DateTimePrecision::Precision2),
327 '3' => Ok(DateTimePrecision::Precision3),
328 '4' => Ok(DateTimePrecision::Precision4),
329 '5' => Ok(DateTimePrecision::Precision5),
330 '6' => Ok(DateTimePrecision::Precision6),
331 '7' => Ok(DateTimePrecision::Precision7),
332 '8' => Ok(DateTimePrecision::Precision8),
333 '9' => Ok(DateTimePrecision::Precision9),
334 _ => Err(TypesError::TypeParsingError(format!(
335 "Invalid DateTime64 precision, expected to be within [0, 9] interval, got {char}"
336 ))),
337 }
338 }
339}
340
341#[derive(Debug, Clone, PartialEq)]
344pub enum DecimalType {
345 Decimal32,
347 Decimal64,
349 Decimal128,
351 Decimal256,
353}
354
355impl Display for DecimalType {
356 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
357 match self {
358 DecimalType::Decimal32 => write!(f, "Decimal32"),
359 DecimalType::Decimal64 => write!(f, "Decimal64"),
360 DecimalType::Decimal128 => write!(f, "Decimal128"),
361 DecimalType::Decimal256 => write!(f, "Decimal256"),
362 }
363 }
364}
365
366impl DecimalType {
367 pub(crate) fn new(precision: u8) -> Result<Self, TypesError> {
368 if precision <= 9 {
369 Ok(DecimalType::Decimal32)
370 } else if precision <= 18 {
371 Ok(DecimalType::Decimal64)
372 } else if precision <= 38 {
373 Ok(DecimalType::Decimal128)
374 } else if precision <= 76 {
375 Ok(DecimalType::Decimal256)
376 } else {
377 Err(TypesError::TypeParsingError(format!(
378 "Invalid Decimal precision: {precision}"
379 )))
380 }
381 }
382}
383
384impl Display for DateTimePrecision {
385 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
386 match self {
387 DateTimePrecision::Precision0 => write!(f, "0"),
388 DateTimePrecision::Precision1 => write!(f, "1"),
389 DateTimePrecision::Precision2 => write!(f, "2"),
390 DateTimePrecision::Precision3 => write!(f, "3"),
391 DateTimePrecision::Precision4 => write!(f, "4"),
392 DateTimePrecision::Precision5 => write!(f, "5"),
393 DateTimePrecision::Precision6 => write!(f, "6"),
394 DateTimePrecision::Precision7 => write!(f, "7"),
395 DateTimePrecision::Precision8 => write!(f, "8"),
396 DateTimePrecision::Precision9 => write!(f, "9"),
397 }
398 }
399}
400
401#[derive(Debug, Clone, PartialEq)]
404#[allow(missing_docs)]
405pub enum IntervalType {
406 Nanosecond,
407 Microsecond,
408 Millisecond,
409 Second,
410 Minute,
411 Hour,
412 Day,
413 Week,
414 Month,
415 Quarter,
416 Year,
417}
418
419impl std::str::FromStr for IntervalType {
420 type Err = TypesError;
421
422 fn from_str(s: &str) -> Result<Self, Self::Err> {
423 match s {
424 "Nanosecond" => Ok(IntervalType::Nanosecond),
425 "Microsecond" => Ok(IntervalType::Microsecond),
426 "Millisecond" => Ok(IntervalType::Millisecond),
427 "Second" => Ok(IntervalType::Second),
428 "Minute" => Ok(IntervalType::Minute),
429 "Hour" => Ok(IntervalType::Hour),
430 "Day" => Ok(IntervalType::Day),
431 "Week" => Ok(IntervalType::Week),
432 "Month" => Ok(IntervalType::Month),
433 "Quarter" => Ok(IntervalType::Quarter),
434 "Year" => Ok(IntervalType::Year),
435 _ => Err(TypesError::TypeParsingError(format!(
436 "Unknown interval type: {s}"
437 ))),
438 }
439 }
440}
441
442impl Display for IntervalType {
443 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
444 match self {
445 Self::Nanosecond => write!(f, "Nanosecond"),
446 Self::Microsecond => write!(f, "Microsecond"),
447 Self::Millisecond => write!(f, "Millisecond"),
448 Self::Second => write!(f, "Second"),
449 Self::Minute => write!(f, "Minute"),
450 Self::Hour => write!(f, "Hour"),
451 Self::Day => write!(f, "Day"),
452 Self::Week => write!(f, "Week"),
453 Self::Month => write!(f, "Month"),
454 Self::Quarter => write!(f, "Quarter"),
455 Self::Year => write!(f, "Year"),
456 }
457 }
458}
459
460fn parse_fixed_string(input: &str) -> Result<DataTypeNode, TypesError> {
461 if input.len() >= 14 {
462 let size_str = &input[12..input.len() - 1];
463 let size = size_str.parse::<usize>().map_err(|err| {
464 TypesError::TypeParsingError(format!(
465 "Invalid FixedString size, expected a valid number. Underlying error: {err}, input: {input}, size_str: {size_str}"
466 ))
467 })?;
468 if size == 0 {
469 return Err(TypesError::TypeParsingError(format!(
470 "Invalid FixedString size, expected a positive number, got zero. Input: {input}"
471 )));
472 }
473 return Ok(DataTypeNode::FixedString(size));
474 }
475 Err(TypesError::TypeParsingError(format!(
476 "Invalid FixedString format, expected FixedString(N), got {input}"
477 )))
478}
479
480fn parse_array(input: &str) -> Result<DataTypeNode, TypesError> {
481 if input.len() >= 8 {
482 let inner_type_str = &input[6..input.len() - 1];
483 let inner_type = DataTypeNode::new(inner_type_str)?;
484 return Ok(DataTypeNode::Array(Box::new(inner_type)));
485 }
486 Err(TypesError::TypeParsingError(format!(
487 "Invalid Array format, expected Array(InnerType), got {input}"
488 )))
489}
490
491fn parse_enum(input: &str) -> Result<DataTypeNode, TypesError> {
492 if input.len() >= 9 {
493 let (enum_type, prefix_len) = if input.starts_with("Enum8") {
494 (EnumType::Enum8, 6)
495 } else if input.starts_with("Enum16") {
496 (EnumType::Enum16, 7)
497 } else {
498 return Err(TypesError::TypeParsingError(format!(
499 "Invalid Enum type, expected Enum8 or Enum16, got {input}"
500 )));
501 };
502 let enum_values_map_str = &input[prefix_len..input.len() - 1];
503 let enum_values_map = parse_enum_values_map(enum_values_map_str)?;
504 return Ok(DataTypeNode::Enum(enum_type, enum_values_map));
505 }
506 Err(TypesError::TypeParsingError(format!(
507 "Invalid Enum format, expected Enum8('name' = value), got {input}"
508 )))
509}
510
511fn parse_datetime(input: &str) -> Result<DataTypeNode, TypesError> {
512 if input == "DateTime" {
513 return Ok(DataTypeNode::DateTime(None));
514 }
515 if input.len() >= 12 {
516 let timezone = input[10..input.len() - 2].to_string();
517 return Ok(DataTypeNode::DateTime(Some(timezone)));
518 }
519 Err(TypesError::TypeParsingError(format!(
520 "Invalid DateTime format, expected DateTime('timezone'), got {input}"
521 )))
522}
523
524fn parse_decimal(input: &str) -> Result<DataTypeNode, TypesError> {
525 if input.len() >= 10 {
526 let precision_and_scale_str = input[8..input.len() - 1].split(", ").collect::<Vec<_>>();
527 if precision_and_scale_str.len() != 2 {
528 return Err(TypesError::TypeParsingError(format!(
529 "Invalid Decimal format, expected Decimal(P, S), got {input}"
530 )));
531 }
532 let parsed = precision_and_scale_str
533 .iter()
534 .map(|s| s.parse::<u8>())
535 .collect::<Result<Vec<_>, _>>()
536 .map_err(|err| {
537 TypesError::TypeParsingError(format!(
538 "Invalid Decimal format, expected Decimal(P, S), got {input}. Underlying error: {err}"
539 ))
540 })?;
541 let precision = parsed[0];
542 let scale = parsed[1];
543 if scale < 1 || precision < 1 {
544 return Err(TypesError::TypeParsingError(format!(
545 "Invalid Decimal format, expected Decimal(P, S) with P > 0 and S > 0, got {input}"
546 )));
547 }
548 if precision < scale {
549 return Err(TypesError::TypeParsingError(format!(
550 "Invalid Decimal format, expected Decimal(P, S) with P >= S, got {input}"
551 )));
552 }
553 let size = DecimalType::new(parsed[0])?;
554 return Ok(DataTypeNode::Decimal(precision, scale, size));
555 }
556 Err(TypesError::TypeParsingError(format!(
557 "Invalid Decimal format, expected Decimal(P), got {input}"
558 )))
559}
560
561fn parse_datetime64(input: &str) -> Result<DataTypeNode, TypesError> {
562 if input.len() >= 13 {
563 let mut chars = input[11..input.len() - 1].chars();
564 let precision_char = chars.next().ok_or(TypesError::TypeParsingError(format!(
565 "Invalid DateTime64 precision, expected a positive number. Input: {input}"
566 )))?;
567 let precision = DateTimePrecision::new(precision_char)?;
568 let maybe_tz = match chars.as_str() {
569 str if str.len() > 2 => Some(str[3..str.len() - 1].to_string()),
570 _ => None,
571 };
572 return Ok(DataTypeNode::DateTime64(precision, maybe_tz));
573 }
574 Err(TypesError::TypeParsingError(format!(
575 "Invalid DateTime format, expected DateTime('timezone'), got {input}"
576 )))
577}
578
579fn parse_time64(input: &str) -> Result<DataTypeNode, TypesError> {
580 if input.len() >= 8 {
581 let mut chars = input[7..input.len() - 1].chars();
582 let precision_char = chars.next().ok_or(TypesError::TypeParsingError(format!(
583 "Invalid Time64 precision, expected a positive number. Input: {input}"
584 )))?;
585 let precision = DateTimePrecision::new(precision_char)?;
586
587 return Ok(DataTypeNode::Time64(precision));
588 }
589 Err(TypesError::TypeParsingError(format!(
590 "Invalid Time64 format, expected Time64(precision, 'timezone'), got {input}"
591 )))
592}
593
594fn parse_low_cardinality(input: &str) -> Result<DataTypeNode, TypesError> {
595 if input.len() >= 16 {
596 let inner_type_str = &input[15..input.len() - 1];
597 let inner_type = DataTypeNode::new(inner_type_str)?;
598 return Ok(DataTypeNode::LowCardinality(Box::new(inner_type)));
599 }
600 Err(TypesError::TypeParsingError(format!(
601 "Invalid LowCardinality format, expected LowCardinality(InnerType), got {input}"
602 )))
603}
604
605fn parse_nullable(input: &str) -> Result<DataTypeNode, TypesError> {
606 if input.len() >= 10 {
607 let inner_type_str = &input[9..input.len() - 1];
608 let inner_type = DataTypeNode::new(inner_type_str)?;
609 return Ok(DataTypeNode::Nullable(Box::new(inner_type)));
610 }
611 Err(TypesError::TypeParsingError(format!(
612 "Invalid Nullable format, expected Nullable(InnerType), got {input}"
613 )))
614}
615
616fn parse_map(input: &str) -> Result<DataTypeNode, TypesError> {
617 if input.len() >= 5 {
618 let inner_types_str = &input[4..input.len() - 1];
619 let inner_types = parse_inner_types(inner_types_str)?;
620 if inner_types.len() != 2 {
621 return Err(TypesError::TypeParsingError(format!(
622 "Expected two inner elements in a Map from input {input}"
623 )));
624 }
625 return Ok(DataTypeNode::Map([
626 Box::new(inner_types[0].clone()),
627 Box::new(inner_types[1].clone()),
628 ]));
629 }
630 Err(TypesError::TypeParsingError(format!(
631 "Invalid Map format, expected Map(KeyType, ValueType), got {input}"
632 )))
633}
634
635fn parse_tuple(input: &str) -> Result<DataTypeNode, TypesError> {
636 if input.len() > 7 {
637 let inner_types_str = &input[6..input.len() - 1];
638 let inner_types = parse_inner_types(inner_types_str)?;
639 if inner_types.is_empty() {
640 return Err(TypesError::TypeParsingError(format!(
641 "Expected at least one inner element in a Tuple from input {input}"
642 )));
643 }
644 return Ok(DataTypeNode::Tuple(inner_types));
645 }
646 Err(TypesError::TypeParsingError(format!(
647 "Invalid Tuple format, expected Tuple(Type1, Type2, ...), got {input}"
648 )))
649}
650
651fn parse_variant(input: &str) -> Result<DataTypeNode, TypesError> {
652 if input.len() >= 9 {
653 let inner_types_str = &input[8..input.len() - 1];
654 let inner_types = parse_inner_types(inner_types_str)?;
655 return Ok(DataTypeNode::Variant(inner_types));
656 }
657 Err(TypesError::TypeParsingError(format!(
658 "Invalid Variant format, expected Variant(Type1, Type2, ...), got {input}"
659 )))
660}
661
662fn parse_inner_types(input: &str) -> Result<Vec<DataTypeNode>, TypesError> {
669 let mut inner_types: Vec<DataTypeNode> = Vec::new();
670
671 let input_bytes = input.as_bytes();
672
673 let mut open_parens = 0;
674 let mut quote_open = false;
675 let mut char_escaped = false;
676 let mut last_element_index = 0;
677
678 let mut i = 0;
679 while i < input_bytes.len() {
680 if char_escaped {
681 char_escaped = false;
682 } else if input_bytes[i] == b'\\' {
683 char_escaped = true;
684 } else if input_bytes[i] == b'\'' {
685 quote_open = !quote_open; } else if !quote_open {
687 if input_bytes[i] == b'(' {
688 open_parens += 1;
689 } else if input_bytes[i] == b')' {
690 open_parens -= 1;
691 } else if input_bytes[i] == b',' && open_parens == 0 {
692 let data_type_str = String::from_utf8(input_bytes[last_element_index..i].to_vec())
693 .map_err(|_| {
694 TypesError::TypeParsingError(format!(
695 "Invalid UTF-8 sequence in input for the inner data type: {}",
696 &input[last_element_index..]
697 ))
698 })?;
699 let data_type = DataTypeNode::new(&data_type_str)?;
700 inner_types.push(data_type);
701 if i + 2 <= input_bytes.len() && input_bytes[i + 1] == b' ' {
703 i += 2;
704 } else {
705 i += 1;
706 }
707 last_element_index = i;
708 continue; }
710 }
711 i += 1;
712 }
713
714 if open_parens == 0 && last_element_index < input_bytes.len() {
716 let data_type_str =
717 String::from_utf8(input_bytes[last_element_index..].to_vec()).map_err(|_| {
718 TypesError::TypeParsingError(format!(
719 "Invalid UTF-8 sequence in input for the inner data type: {}",
720 &input[last_element_index..]
721 ))
722 })?;
723 let data_type = DataTypeNode::new(&data_type_str)?;
724 inner_types.push(data_type);
725 }
726
727 Ok(inner_types)
728}
729
730#[inline]
731fn parse_enum_index(input_bytes: &[u8], input: &str) -> Result<i16, TypesError> {
732 String::from_utf8(input_bytes.to_vec())
733 .map_err(|_| {
734 TypesError::TypeParsingError(format!(
735 "Invalid UTF-8 sequence in input for the enum index: {}",
736 &input
737 ))
738 })?
739 .parse::<i16>()
740 .map_err(|_| {
741 TypesError::TypeParsingError(format!(
742 "Invalid Enum index, expected a valid number. Input: {input}"
743 ))
744 })
745}
746
747fn parse_enum_values_map(input: &str) -> Result<HashMap<i16, String>, TypesError> {
748 let mut names: Vec<String> = Vec::new();
749 let mut indices: Vec<i16> = Vec::new();
750 let mut parsing_name = true; let mut char_escaped = false; let mut start_index = 1; let mut i = 1;
755 let input_bytes = input.as_bytes();
756 while i < input_bytes.len() {
757 if parsing_name {
758 if char_escaped {
759 char_escaped = false;
760 } else if input_bytes[i] == b'\\' {
761 char_escaped = true;
762 } else if input_bytes[i] == b'\'' {
763 let name_bytes = &input_bytes[start_index..i];
765 let name = String::from_utf8(name_bytes.to_vec()).map_err(|_| {
766 TypesError::TypeParsingError(format!(
767 "Invalid UTF-8 sequence in input for the enum name: {}",
768 &input[start_index..i]
769 ))
770 })?;
771 names.push(name);
772
773 if i + 4 >= input_bytes.len() {
775 return Err(TypesError::TypeParsingError(format!(
776 "Invalid Enum format - expected ` = ` after name, input: {input}",
777 )));
778 }
779 i += 4;
780 start_index = i;
781 parsing_name = false;
782 }
783 }
784 else if input_bytes[i] < b'0' || input_bytes[i] > b'9' {
786 let index = parse_enum_index(&input_bytes[start_index..i], input)?;
787 indices.push(index);
788
789 if i + 2 >= input_bytes.len() {
792 break; }
794 i += 2;
795 start_index = i + 1;
796 parsing_name = true;
797 char_escaped = false;
798 }
799
800 i += 1;
801 }
802
803 let index = parse_enum_index(&input_bytes[start_index..i], input)?;
804 indices.push(index);
805
806 if names.len() != indices.len() {
807 return Err(TypesError::TypeParsingError(format!(
808 "Invalid Enum format - expected the same number of names and indices, got names: {}, indices: {}",
809 names.join(", "),
810 indices
811 .iter()
812 .map(|index| index.to_string())
813 .collect::<Vec<String>>()
814 .join(", "),
815 )));
816 }
817
818 Ok(indices
819 .into_iter()
820 .zip(names)
821 .collect::<HashMap<i16, String>>())
822}
823
824#[cfg(test)]
825mod tests {
826 use super::*;
827
828 #[test]
829 fn test_aggregate_function_display() {
830 let simple = DataTypeNode::AggregateFunction("sum".to_string(), vec![DataTypeNode::UInt64]);
831 assert_eq!(simple.to_string(), "AggregateFunction(sum, UInt64)");
832
833 let complex = DataTypeNode::AggregateFunction(
834 "groupArray".to_string(),
835 vec![
836 DataTypeNode::String,
837 DataTypeNode::UInt32,
838 DataTypeNode::Nullable(Box::new(DataTypeNode::Float64)),
839 ],
840 );
841 assert_eq!(
842 complex.to_string(),
843 "AggregateFunction(groupArray, String, UInt32, Nullable(Float64))"
844 );
845 }
846
847 #[test]
848 fn test_tuple_display() {
849 let empty = DataTypeNode::Tuple(vec![]);
850 assert_eq!(empty.to_string(), "Tuple()");
851
852 let single = DataTypeNode::Tuple(vec![DataTypeNode::String]);
853 assert_eq!(single.to_string(), "Tuple(String)");
854
855 let multiple = DataTypeNode::Tuple(vec![
856 DataTypeNode::UInt64,
857 DataTypeNode::String,
858 DataTypeNode::DateTime(None),
859 DataTypeNode::Array(Box::new(DataTypeNode::Int32)),
860 ]);
861 assert_eq!(
862 multiple.to_string(),
863 "Tuple(UInt64, String, DateTime, Array(Int32))"
864 );
865 }
866
867 #[test]
868 fn test_enum_display() {
869 let mut values1 = HashMap::new();
870 values1.insert(1, "one".to_string());
871 values1.insert(2, "two".to_string());
872 values1.insert(3, "three".to_string());
873
874 let simple_enum = DataTypeNode::Enum(EnumType::Enum8, values1);
875 assert_eq!(
876 simple_enum.to_string(),
877 "Enum8('one' = 1, 'two' = 2, 'three' = 3)"
878 );
879
880 let mut values2 = HashMap::new();
882 values2.insert(10, "ten".to_string());
883 values2.insert(1, "one".to_string());
884 values2.insert(5, "five".to_string());
885
886 let ordered_enum = DataTypeNode::Enum(EnumType::Enum16, values2);
887 assert_eq!(
888 ordered_enum.to_string(),
889 "Enum16('one' = 1, 'five' = 5, 'ten' = 10)"
890 );
891 }
892
893 #[test]
894 fn test_variant_display() {
895 let empty = DataTypeNode::Variant(vec![]);
897 assert_eq!(empty.to_string(), "Variant()");
898
899 let single = DataTypeNode::Variant(vec![DataTypeNode::String]);
901 assert_eq!(single.to_string(), "Variant(String)");
902
903 let multiple = DataTypeNode::Variant(vec![
905 DataTypeNode::UInt64,
906 DataTypeNode::String,
907 DataTypeNode::Nullable(Box::new(DataTypeNode::DateTime(None))),
908 DataTypeNode::Array(Box::new(DataTypeNode::Int32)),
909 ]);
910 assert_eq!(
911 multiple.to_string(),
912 "Variant(UInt64, String, Nullable(DateTime), Array(Int32))"
913 );
914
915 let nested = DataTypeNode::Variant(vec![
917 DataTypeNode::Tuple(vec![DataTypeNode::String, DataTypeNode::UInt64]),
918 DataTypeNode::Map([
919 Box::new(DataTypeNode::String),
920 Box::new(DataTypeNode::Int32),
921 ]),
922 ]);
923 assert_eq!(
924 nested.to_string(),
925 "Variant(Tuple(String, UInt64), Map(String, Int32))"
926 );
927 }
928
929 #[test]
930 fn test_data_type_new_simple() {
931 assert_eq!(DataTypeNode::new("UInt8").unwrap(), DataTypeNode::UInt8);
932 assert_eq!(DataTypeNode::new("UInt16").unwrap(), DataTypeNode::UInt16);
933 assert_eq!(DataTypeNode::new("UInt32").unwrap(), DataTypeNode::UInt32);
934 assert_eq!(DataTypeNode::new("UInt64").unwrap(), DataTypeNode::UInt64);
935 assert_eq!(DataTypeNode::new("UInt128").unwrap(), DataTypeNode::UInt128);
936 assert_eq!(DataTypeNode::new("UInt256").unwrap(), DataTypeNode::UInt256);
937 assert_eq!(DataTypeNode::new("Int8").unwrap(), DataTypeNode::Int8);
938 assert_eq!(DataTypeNode::new("Int16").unwrap(), DataTypeNode::Int16);
939 assert_eq!(DataTypeNode::new("Int32").unwrap(), DataTypeNode::Int32);
940 assert_eq!(DataTypeNode::new("Int64").unwrap(), DataTypeNode::Int64);
941 assert_eq!(DataTypeNode::new("Int128").unwrap(), DataTypeNode::Int128);
942 assert_eq!(DataTypeNode::new("Int256").unwrap(), DataTypeNode::Int256);
943 assert_eq!(DataTypeNode::new("Float32").unwrap(), DataTypeNode::Float32);
944 assert_eq!(DataTypeNode::new("Float64").unwrap(), DataTypeNode::Float64);
945 assert_eq!(
946 DataTypeNode::new("BFloat16").unwrap(),
947 DataTypeNode::BFloat16
948 );
949 assert_eq!(DataTypeNode::new("String").unwrap(), DataTypeNode::String);
950 assert_eq!(DataTypeNode::new("UUID").unwrap(), DataTypeNode::UUID);
951 assert_eq!(DataTypeNode::new("Date").unwrap(), DataTypeNode::Date);
952 assert_eq!(DataTypeNode::new("Date32").unwrap(), DataTypeNode::Date32);
953 assert_eq!(DataTypeNode::new("IPv4").unwrap(), DataTypeNode::IPv4);
954 assert_eq!(DataTypeNode::new("IPv6").unwrap(), DataTypeNode::IPv6);
955 assert_eq!(DataTypeNode::new("Bool").unwrap(), DataTypeNode::Bool);
956 assert_eq!(DataTypeNode::new("Dynamic").unwrap(), DataTypeNode::Dynamic);
957 assert_eq!(DataTypeNode::new("JSON").unwrap(), DataTypeNode::JSON);
958 assert_eq!(
959 DataTypeNode::new("JSON(max_dynamic_types=8, max_dynamic_paths=64)").unwrap(),
960 DataTypeNode::JSON
961 );
962 assert!(DataTypeNode::new("SomeUnknownType").is_err());
963 }
964
965 #[test]
966 fn test_data_type_new_fixed_string() {
967 assert_eq!(
968 DataTypeNode::new("FixedString(1)").unwrap(),
969 DataTypeNode::FixedString(1)
970 );
971 assert_eq!(
972 DataTypeNode::new("FixedString(16)").unwrap(),
973 DataTypeNode::FixedString(16)
974 );
975 assert_eq!(
976 DataTypeNode::new("FixedString(255)").unwrap(),
977 DataTypeNode::FixedString(255)
978 );
979 assert_eq!(
980 DataTypeNode::new("FixedString(65535)").unwrap(),
981 DataTypeNode::FixedString(65_535)
982 );
983 assert!(DataTypeNode::new("FixedString()").is_err());
984 assert!(DataTypeNode::new("FixedString(0)").is_err());
985 assert!(DataTypeNode::new("FixedString(-1)").is_err());
986 assert!(DataTypeNode::new("FixedString(abc)").is_err());
987 }
988
989 #[test]
990 fn test_data_type_new_array() {
991 assert_eq!(
992 DataTypeNode::new("Array(UInt8)").unwrap(),
993 DataTypeNode::Array(Box::new(DataTypeNode::UInt8))
994 );
995 assert_eq!(
996 DataTypeNode::new("Array(String)").unwrap(),
997 DataTypeNode::Array(Box::new(DataTypeNode::String))
998 );
999 assert_eq!(
1000 DataTypeNode::new("Array(FixedString(16))").unwrap(),
1001 DataTypeNode::Array(Box::new(DataTypeNode::FixedString(16)))
1002 );
1003 assert_eq!(
1004 DataTypeNode::new("Array(Nullable(Int32))").unwrap(),
1005 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1006 DataTypeNode::Int32
1007 ))))
1008 );
1009 assert!(DataTypeNode::new("Array()").is_err());
1010 assert!(DataTypeNode::new("Array(abc)").is_err());
1011 }
1012
1013 #[test]
1014 fn test_data_type_new_decimal() {
1015 assert_eq!(
1016 DataTypeNode::new("Decimal(7, 2)").unwrap(),
1017 DataTypeNode::Decimal(7, 2, DecimalType::Decimal32)
1018 );
1019 assert_eq!(
1020 DataTypeNode::new("Decimal(12, 4)").unwrap(),
1021 DataTypeNode::Decimal(12, 4, DecimalType::Decimal64)
1022 );
1023 assert_eq!(
1024 DataTypeNode::new("Decimal(27, 6)").unwrap(),
1025 DataTypeNode::Decimal(27, 6, DecimalType::Decimal128)
1026 );
1027 assert_eq!(
1028 DataTypeNode::new("Decimal(42, 8)").unwrap(),
1029 DataTypeNode::Decimal(42, 8, DecimalType::Decimal256)
1030 );
1031 assert!(DataTypeNode::new("Decimal").is_err());
1032 assert!(DataTypeNode::new("Decimal(").is_err());
1033 assert!(DataTypeNode::new("Decimal()").is_err());
1034 assert!(DataTypeNode::new("Decimal(1)").is_err());
1035 assert!(DataTypeNode::new("Decimal(1,)").is_err());
1036 assert!(DataTypeNode::new("Decimal(1, )").is_err());
1037 assert!(DataTypeNode::new("Decimal(0, 0)").is_err()); assert!(DataTypeNode::new("Decimal(x, 0)").is_err()); assert!(DataTypeNode::new("Decimal(', ')").is_err());
1040 assert!(DataTypeNode::new("Decimal(77, 1)").is_err()); assert!(DataTypeNode::new("Decimal(1, 2)").is_err()); assert!(DataTypeNode::new("Decimal(1, x)").is_err()); assert!(DataTypeNode::new("Decimal(42, ,)").is_err());
1044 assert!(DataTypeNode::new("Decimal(42, ')").is_err());
1045 assert!(DataTypeNode::new("Decimal(foobar)").is_err());
1046 }
1047
1048 #[test]
1049 fn test_data_type_new_datetime() {
1050 assert_eq!(
1051 DataTypeNode::new("DateTime").unwrap(),
1052 DataTypeNode::DateTime(None)
1053 );
1054 assert_eq!(
1055 DataTypeNode::new("DateTime('UTC')").unwrap(),
1056 DataTypeNode::DateTime(Some("UTC".to_string()))
1057 );
1058 assert_eq!(
1059 DataTypeNode::new("DateTime('America/New_York')").unwrap(),
1060 DataTypeNode::DateTime(Some("America/New_York".to_string()))
1061 );
1062 assert!(DataTypeNode::new("DateTime()").is_err());
1063 }
1064
1065 #[test]
1066 fn test_data_type_new_datetime64() {
1067 assert_eq!(
1068 DataTypeNode::new("DateTime64(0)").unwrap(),
1069 DataTypeNode::DateTime64(DateTimePrecision::Precision0, None)
1070 );
1071 assert_eq!(
1072 DataTypeNode::new("DateTime64(1)").unwrap(),
1073 DataTypeNode::DateTime64(DateTimePrecision::Precision1, None)
1074 );
1075 assert_eq!(
1076 DataTypeNode::new("DateTime64(2)").unwrap(),
1077 DataTypeNode::DateTime64(DateTimePrecision::Precision2, None)
1078 );
1079 assert_eq!(
1080 DataTypeNode::new("DateTime64(3)").unwrap(),
1081 DataTypeNode::DateTime64(DateTimePrecision::Precision3, None)
1082 );
1083 assert_eq!(
1084 DataTypeNode::new("DateTime64(4)").unwrap(),
1085 DataTypeNode::DateTime64(DateTimePrecision::Precision4, None)
1086 );
1087 assert_eq!(
1088 DataTypeNode::new("DateTime64(5)").unwrap(),
1089 DataTypeNode::DateTime64(DateTimePrecision::Precision5, None)
1090 );
1091 assert_eq!(
1092 DataTypeNode::new("DateTime64(6)").unwrap(),
1093 DataTypeNode::DateTime64(DateTimePrecision::Precision6, None)
1094 );
1095 assert_eq!(
1096 DataTypeNode::new("DateTime64(7)").unwrap(),
1097 DataTypeNode::DateTime64(DateTimePrecision::Precision7, None)
1098 );
1099 assert_eq!(
1100 DataTypeNode::new("DateTime64(8)").unwrap(),
1101 DataTypeNode::DateTime64(DateTimePrecision::Precision8, None)
1102 );
1103 assert_eq!(
1104 DataTypeNode::new("DateTime64(9)").unwrap(),
1105 DataTypeNode::DateTime64(DateTimePrecision::Precision9, None)
1106 );
1107 assert_eq!(
1108 DataTypeNode::new("DateTime64(0, 'UTC')").unwrap(),
1109 DataTypeNode::DateTime64(DateTimePrecision::Precision0, Some("UTC".to_string()))
1110 );
1111 assert_eq!(
1112 DataTypeNode::new("DateTime64(3, 'America/New_York')").unwrap(),
1113 DataTypeNode::DateTime64(
1114 DateTimePrecision::Precision3,
1115 Some("America/New_York".to_string())
1116 )
1117 );
1118 assert_eq!(
1119 DataTypeNode::new("DateTime64(6, 'America/New_York')").unwrap(),
1120 DataTypeNode::DateTime64(
1121 DateTimePrecision::Precision6,
1122 Some("America/New_York".to_string())
1123 )
1124 );
1125 assert_eq!(
1126 DataTypeNode::new("DateTime64(9, 'Europe/Amsterdam')").unwrap(),
1127 DataTypeNode::DateTime64(
1128 DateTimePrecision::Precision9,
1129 Some("Europe/Amsterdam".to_string())
1130 )
1131 );
1132 assert!(DataTypeNode::new("DateTime64()").is_err());
1133 assert!(DataTypeNode::new("DateTime64(x)").is_err());
1134 }
1135
1136 #[test]
1137 fn test_data_type_new_time() {
1138 assert_eq!(DataTypeNode::new("Time").unwrap(), DataTypeNode::Time);
1139 assert_eq!(
1140 DataTypeNode::new("Time('UTC')").unwrap(),
1141 DataTypeNode::Time
1142 );
1143 assert_eq!(
1144 DataTypeNode::new("Time('America/New_York')").unwrap(),
1145 DataTypeNode::Time
1146 );
1147 assert_eq!(DataTypeNode::new("Time()").unwrap(), DataTypeNode::Time);
1148 }
1149
1150 #[test]
1151 fn test_data_type_new_time64() {
1152 assert_eq!(
1153 DataTypeNode::new("Time64(0)").unwrap(),
1154 DataTypeNode::Time64(DateTimePrecision::Precision0)
1155 );
1156 assert_eq!(
1157 DataTypeNode::new("Time64(1)").unwrap(),
1158 DataTypeNode::Time64(DateTimePrecision::Precision1)
1159 );
1160 assert_eq!(
1161 DataTypeNode::new("Time64(2)").unwrap(),
1162 DataTypeNode::Time64(DateTimePrecision::Precision2)
1163 );
1164 assert_eq!(
1165 DataTypeNode::new("Time64(3)").unwrap(),
1166 DataTypeNode::Time64(DateTimePrecision::Precision3)
1167 );
1168 assert_eq!(
1169 DataTypeNode::new("Time64(4)").unwrap(),
1170 DataTypeNode::Time64(DateTimePrecision::Precision4)
1171 );
1172 assert_eq!(
1173 DataTypeNode::new("Time64(5)").unwrap(),
1174 DataTypeNode::Time64(DateTimePrecision::Precision5)
1175 );
1176 assert_eq!(
1177 DataTypeNode::new("Time64(6)").unwrap(),
1178 DataTypeNode::Time64(DateTimePrecision::Precision6)
1179 );
1180 assert_eq!(
1181 DataTypeNode::new("Time64(7)").unwrap(),
1182 DataTypeNode::Time64(DateTimePrecision::Precision7)
1183 );
1184 assert_eq!(
1185 DataTypeNode::new("Time64(8)").unwrap(),
1186 DataTypeNode::Time64(DateTimePrecision::Precision8)
1187 );
1188 assert_eq!(
1189 DataTypeNode::new("Time64(9)").unwrap(),
1190 DataTypeNode::Time64(DateTimePrecision::Precision9)
1191 );
1192 assert_eq!(
1193 DataTypeNode::new("Time64(0, 'UTC')").unwrap(),
1194 DataTypeNode::Time64(DateTimePrecision::Precision0)
1195 );
1196 assert_eq!(
1197 DataTypeNode::new("Time64(3, 'America/New_York')").unwrap(),
1198 DataTypeNode::Time64(DateTimePrecision::Precision3)
1199 );
1200 assert_eq!(
1201 DataTypeNode::new("Time64(6, 'America/New_York')").unwrap(),
1202 DataTypeNode::Time64(DateTimePrecision::Precision6)
1203 );
1204 assert_eq!(
1205 DataTypeNode::new("Time64(9, 'Europe/Amsterdam')").unwrap(),
1206 DataTypeNode::Time64(DateTimePrecision::Precision9)
1207 );
1208 assert!(DataTypeNode::new("Time64()").is_err());
1209 assert!(DataTypeNode::new("Time64(x)").is_err());
1210 }
1211
1212 #[test]
1213 fn test_data_type_new_interval() {
1214 assert_eq!(
1215 DataTypeNode::new("IntervalNanosecond").unwrap(),
1216 DataTypeNode::Interval(IntervalType::Nanosecond)
1217 );
1218 assert_eq!(
1219 DataTypeNode::new("IntervalMicrosecond").unwrap(),
1220 DataTypeNode::Interval(IntervalType::Microsecond)
1221 );
1222 assert_eq!(
1223 DataTypeNode::new("IntervalMillisecond").unwrap(),
1224 DataTypeNode::Interval(IntervalType::Millisecond)
1225 );
1226 assert_eq!(
1227 DataTypeNode::new("IntervalSecond").unwrap(),
1228 DataTypeNode::Interval(IntervalType::Second)
1229 );
1230 assert_eq!(
1231 DataTypeNode::new("IntervalMinute").unwrap(),
1232 DataTypeNode::Interval(IntervalType::Minute)
1233 );
1234 assert_eq!(
1235 DataTypeNode::new("IntervalHour").unwrap(),
1236 DataTypeNode::Interval(IntervalType::Hour)
1237 );
1238 assert_eq!(
1239 DataTypeNode::new("IntervalDay").unwrap(),
1240 DataTypeNode::Interval(IntervalType::Day)
1241 );
1242 assert_eq!(
1243 DataTypeNode::new("IntervalWeek").unwrap(),
1244 DataTypeNode::Interval(IntervalType::Week)
1245 );
1246 assert_eq!(
1247 DataTypeNode::new("IntervalMonth").unwrap(),
1248 DataTypeNode::Interval(IntervalType::Month)
1249 );
1250 assert_eq!(
1251 DataTypeNode::new("IntervalQuarter").unwrap(),
1252 DataTypeNode::Interval(IntervalType::Quarter)
1253 );
1254 assert_eq!(
1255 DataTypeNode::new("IntervalYear").unwrap(),
1256 DataTypeNode::Interval(IntervalType::Year)
1257 );
1258 }
1259
1260 #[test]
1261 fn test_data_type_new_low_cardinality() {
1262 assert_eq!(
1263 DataTypeNode::new("LowCardinality(UInt8)").unwrap(),
1264 DataTypeNode::LowCardinality(Box::new(DataTypeNode::UInt8))
1265 );
1266 assert_eq!(
1267 DataTypeNode::new("LowCardinality(String)").unwrap(),
1268 DataTypeNode::LowCardinality(Box::new(DataTypeNode::String))
1269 );
1270 assert_eq!(
1271 DataTypeNode::new("LowCardinality(Array(Int32))").unwrap(),
1272 DataTypeNode::LowCardinality(Box::new(DataTypeNode::Array(Box::new(
1273 DataTypeNode::Int32
1274 ))))
1275 );
1276 assert_eq!(
1277 DataTypeNode::new("LowCardinality(Nullable(Int32))").unwrap(),
1278 DataTypeNode::LowCardinality(Box::new(DataTypeNode::Nullable(Box::new(
1279 DataTypeNode::Int32
1280 ))))
1281 );
1282 assert!(DataTypeNode::new("LowCardinality").is_err());
1283 assert!(DataTypeNode::new("LowCardinality()").is_err());
1284 assert!(DataTypeNode::new("LowCardinality(X)").is_err());
1285 }
1286
1287 #[test]
1288 fn test_data_type_new_nullable() {
1289 assert_eq!(
1290 DataTypeNode::new("Nullable(UInt8)").unwrap(),
1291 DataTypeNode::Nullable(Box::new(DataTypeNode::UInt8))
1292 );
1293 assert_eq!(
1294 DataTypeNode::new("Nullable(String)").unwrap(),
1295 DataTypeNode::Nullable(Box::new(DataTypeNode::String))
1296 );
1297 assert!(DataTypeNode::new("Nullable").is_err());
1298 assert!(DataTypeNode::new("Nullable()").is_err());
1299 assert!(DataTypeNode::new("Nullable(X)").is_err());
1300 }
1301
1302 #[test]
1303 fn test_data_type_new_map() {
1304 assert_eq!(
1305 DataTypeNode::new("Map(UInt8, String)").unwrap(),
1306 DataTypeNode::Map([
1307 Box::new(DataTypeNode::UInt8),
1308 Box::new(DataTypeNode::String)
1309 ])
1310 );
1311 assert_eq!(
1312 DataTypeNode::new("Map(String, Int32)").unwrap(),
1313 DataTypeNode::Map([
1314 Box::new(DataTypeNode::String),
1315 Box::new(DataTypeNode::Int32)
1316 ])
1317 );
1318 assert_eq!(
1319 DataTypeNode::new("Map(String, Map(Int32, Array(Nullable(String))))").unwrap(),
1320 DataTypeNode::Map([
1321 Box::new(DataTypeNode::String),
1322 Box::new(DataTypeNode::Map([
1323 Box::new(DataTypeNode::Int32),
1324 Box::new(DataTypeNode::Array(Box::new(DataTypeNode::Nullable(
1325 Box::new(DataTypeNode::String)
1326 ))))
1327 ]))
1328 ])
1329 );
1330 assert!(DataTypeNode::new("Map()").is_err());
1331 assert!(DataTypeNode::new("Map").is_err());
1332 assert!(DataTypeNode::new("Map(K)").is_err());
1333 assert!(DataTypeNode::new("Map(K, V)").is_err());
1334 assert!(DataTypeNode::new("Map(Int32, V)").is_err());
1335 assert!(DataTypeNode::new("Map(K, Int32)").is_err());
1336 assert!(DataTypeNode::new("Map(String, Int32").is_err());
1337 }
1338
1339 #[test]
1340 fn test_data_type_new_variant() {
1341 assert_eq!(
1342 DataTypeNode::new("Variant(UInt8, String)").unwrap(),
1343 DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::String])
1344 );
1345 assert_eq!(
1346 DataTypeNode::new("Variant(String, Int32)").unwrap(),
1347 DataTypeNode::Variant(vec![DataTypeNode::String, DataTypeNode::Int32])
1348 );
1349 assert_eq!(
1350 DataTypeNode::new("Variant(Int32, Array(Nullable(String)), Map(Int32, String))")
1351 .unwrap(),
1352 DataTypeNode::Variant(vec![
1353 DataTypeNode::Int32,
1354 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1355 DataTypeNode::String
1356 )))),
1357 DataTypeNode::Map([
1358 Box::new(DataTypeNode::Int32),
1359 Box::new(DataTypeNode::String)
1360 ])
1361 ])
1362 );
1363 assert!(DataTypeNode::new("Variant").is_err());
1364 }
1365
1366 #[test]
1367 fn test_data_type_new_tuple() {
1368 assert_eq!(
1369 DataTypeNode::new("Tuple(UInt8, String)").unwrap(),
1370 DataTypeNode::Tuple(vec![DataTypeNode::UInt8, DataTypeNode::String])
1371 );
1372 assert_eq!(
1373 DataTypeNode::new("Tuple(String, Int32)").unwrap(),
1374 DataTypeNode::Tuple(vec![DataTypeNode::String, DataTypeNode::Int32])
1375 );
1376 assert_eq!(
1377 DataTypeNode::new("Tuple(Bool,Int32)").unwrap(),
1378 DataTypeNode::Tuple(vec![DataTypeNode::Bool, DataTypeNode::Int32])
1379 );
1380 assert_eq!(
1381 DataTypeNode::new(
1382 "Tuple(Int32, Array(Nullable(String)), Map(Int32, Tuple(String, Array(UInt8))))"
1383 )
1384 .unwrap(),
1385 DataTypeNode::Tuple(vec![
1386 DataTypeNode::Int32,
1387 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1388 DataTypeNode::String
1389 )))),
1390 DataTypeNode::Map([
1391 Box::new(DataTypeNode::Int32),
1392 Box::new(DataTypeNode::Tuple(vec![
1393 DataTypeNode::String,
1394 DataTypeNode::Array(Box::new(DataTypeNode::UInt8))
1395 ]))
1396 ])
1397 ])
1398 );
1399 assert_eq!(
1400 DataTypeNode::new(&format!("Tuple(String, {ENUM_WITH_ESCAPING_STR})")).unwrap(),
1401 DataTypeNode::Tuple(vec![DataTypeNode::String, enum_with_escaping()])
1402 );
1403 assert!(DataTypeNode::new("Tuple").is_err());
1404 assert!(DataTypeNode::new("Tuple(").is_err());
1405 assert!(DataTypeNode::new("Tuple()").is_err());
1406 assert!(DataTypeNode::new("Tuple(,)").is_err());
1407 assert!(DataTypeNode::new("Tuple(X)").is_err());
1408 assert!(DataTypeNode::new("Tuple(Int32, X)").is_err());
1409 assert!(DataTypeNode::new("Tuple(Int32, String, X)").is_err());
1410 }
1411
1412 #[test]
1413 fn test_data_type_new_enum() {
1414 assert_eq!(
1415 DataTypeNode::new("Enum8('A' = -42)").unwrap(),
1416 DataTypeNode::Enum(EnumType::Enum8, HashMap::from([(-42, "A".to_string())]))
1417 );
1418 assert_eq!(
1419 DataTypeNode::new("Enum16('A' = -144)").unwrap(),
1420 DataTypeNode::Enum(EnumType::Enum16, HashMap::from([(-144, "A".to_string())]))
1421 );
1422 assert_eq!(
1423 DataTypeNode::new("Enum8('A' = 1, 'B' = 2)").unwrap(),
1424 DataTypeNode::Enum(
1425 EnumType::Enum8,
1426 HashMap::from([(1, "A".to_string()), (2, "B".to_string())])
1427 )
1428 );
1429 assert_eq!(
1430 DataTypeNode::new("Enum16('A' = 1, 'B' = 2)").unwrap(),
1431 DataTypeNode::Enum(
1432 EnumType::Enum16,
1433 HashMap::from([(1, "A".to_string()), (2, "B".to_string())])
1434 )
1435 );
1436 assert_eq!(
1437 DataTypeNode::new(ENUM_WITH_ESCAPING_STR).unwrap(),
1438 enum_with_escaping()
1439 );
1440 assert_eq!(
1441 DataTypeNode::new("Enum8('foo' = 0, '' = 42)").unwrap(),
1442 DataTypeNode::Enum(
1443 EnumType::Enum8,
1444 HashMap::from([(0, "foo".to_string()), (42, "".to_string())])
1445 )
1446 );
1447
1448 assert!(DataTypeNode::new("Enum()").is_err());
1449 assert!(DataTypeNode::new("Enum8()").is_err());
1450 assert!(DataTypeNode::new("Enum16()").is_err());
1451 assert!(DataTypeNode::new("Enum32('A' = 1, 'B' = 2)").is_err());
1452 assert!(DataTypeNode::new("Enum32('A','B')").is_err());
1453 assert!(DataTypeNode::new("Enum32('A' = 1, 'B')").is_err());
1454 assert!(DataTypeNode::new("Enum32('A' = 1, 'B' =)").is_err());
1455 assert!(DataTypeNode::new("Enum32('A' = 1, 'B' = )").is_err());
1456 assert!(DataTypeNode::new("Enum32('A'= 1,'B' =)").is_err());
1457 }
1458
1459 #[test]
1460 fn test_data_type_new_geo() {
1461 assert_eq!(DataTypeNode::new("Point").unwrap(), DataTypeNode::Point);
1462 assert_eq!(DataTypeNode::new("Ring").unwrap(), DataTypeNode::Ring);
1463 assert_eq!(
1464 DataTypeNode::new("LineString").unwrap(),
1465 DataTypeNode::LineString
1466 );
1467 assert_eq!(DataTypeNode::new("Polygon").unwrap(), DataTypeNode::Polygon);
1468 assert_eq!(
1469 DataTypeNode::new("MultiLineString").unwrap(),
1470 DataTypeNode::MultiLineString
1471 );
1472 assert_eq!(
1473 DataTypeNode::new("MultiPolygon").unwrap(),
1474 DataTypeNode::MultiPolygon
1475 );
1476 }
1477
1478 #[test]
1479 fn test_data_type_to_string_simple() {
1480 assert_eq!(DataTypeNode::UInt8.to_string(), "UInt8");
1482 assert_eq!(DataTypeNode::UInt16.to_string(), "UInt16");
1483 assert_eq!(DataTypeNode::UInt32.to_string(), "UInt32");
1484 assert_eq!(DataTypeNode::UInt64.to_string(), "UInt64");
1485 assert_eq!(DataTypeNode::UInt128.to_string(), "UInt128");
1486 assert_eq!(DataTypeNode::UInt256.to_string(), "UInt256");
1487 assert_eq!(DataTypeNode::Int8.to_string(), "Int8");
1488 assert_eq!(DataTypeNode::Int16.to_string(), "Int16");
1489 assert_eq!(DataTypeNode::Int32.to_string(), "Int32");
1490 assert_eq!(DataTypeNode::Int64.to_string(), "Int64");
1491 assert_eq!(DataTypeNode::Int128.to_string(), "Int128");
1492 assert_eq!(DataTypeNode::Int256.to_string(), "Int256");
1493 assert_eq!(DataTypeNode::Float32.to_string(), "Float32");
1494 assert_eq!(DataTypeNode::Float64.to_string(), "Float64");
1495 assert_eq!(DataTypeNode::BFloat16.to_string(), "BFloat16");
1496 assert_eq!(DataTypeNode::UUID.to_string(), "UUID");
1497 assert_eq!(DataTypeNode::Date.to_string(), "Date");
1498 assert_eq!(DataTypeNode::Date32.to_string(), "Date32");
1499 assert_eq!(DataTypeNode::IPv4.to_string(), "IPv4");
1500 assert_eq!(DataTypeNode::IPv6.to_string(), "IPv6");
1501 assert_eq!(DataTypeNode::Bool.to_string(), "Bool");
1502 assert_eq!(DataTypeNode::Dynamic.to_string(), "Dynamic");
1503 assert_eq!(DataTypeNode::JSON.to_string(), "JSON");
1504 assert_eq!(DataTypeNode::String.to_string(), "String");
1505 }
1506
1507 #[test]
1508 fn test_data_types_to_string_complex() {
1509 assert_eq!(DataTypeNode::DateTime(None).to_string(), "DateTime");
1510 assert_eq!(
1511 DataTypeNode::DateTime(Some("UTC".to_string())).to_string(),
1512 "DateTime('UTC')"
1513 );
1514 assert_eq!(
1515 DataTypeNode::DateTime(Some("America/New_York".to_string())).to_string(),
1516 "DateTime('America/New_York')"
1517 );
1518
1519 assert_eq!(
1520 DataTypeNode::Nullable(Box::new(DataTypeNode::UInt64)).to_string(),
1521 "Nullable(UInt64)"
1522 );
1523 assert_eq!(
1524 DataTypeNode::LowCardinality(Box::new(DataTypeNode::String)).to_string(),
1525 "LowCardinality(String)"
1526 );
1527 assert_eq!(
1528 DataTypeNode::Array(Box::new(DataTypeNode::String)).to_string(),
1529 "Array(String)"
1530 );
1531 assert_eq!(
1532 DataTypeNode::Array(Box::new(DataTypeNode::Nullable(Box::new(
1533 DataTypeNode::String
1534 ))))
1535 .to_string(),
1536 "Array(Nullable(String))"
1537 );
1538 assert_eq!(
1539 DataTypeNode::Tuple(vec![
1540 DataTypeNode::String,
1541 DataTypeNode::UInt32,
1542 DataTypeNode::Float64
1543 ])
1544 .to_string(),
1545 "Tuple(String, UInt32, Float64)"
1546 );
1547 assert_eq!(
1548 DataTypeNode::Map([
1549 Box::new(DataTypeNode::String),
1550 Box::new(DataTypeNode::UInt32)
1551 ])
1552 .to_string(),
1553 "Map(String, UInt32)"
1554 );
1555 assert_eq!(
1556 DataTypeNode::Decimal(10, 2, DecimalType::Decimal32).to_string(),
1557 "Decimal(10, 2)"
1558 );
1559 assert_eq!(
1560 DataTypeNode::Enum(
1561 EnumType::Enum8,
1562 HashMap::from([(1, "A".to_string()), (2, "B".to_string())]),
1563 )
1564 .to_string(),
1565 "Enum8('A' = 1, 'B' = 2)"
1566 );
1567 assert_eq!(
1568 DataTypeNode::Enum(
1569 EnumType::Enum16,
1570 HashMap::from([(42, "foo".to_string()), (144, "bar".to_string())]),
1571 )
1572 .to_string(),
1573 "Enum16('foo' = 42, 'bar' = 144)"
1574 );
1575 assert_eq!(enum_with_escaping().to_string(), ENUM_WITH_ESCAPING_STR);
1576 assert_eq!(
1577 DataTypeNode::AggregateFunction("sum".to_string(), vec![DataTypeNode::UInt64])
1578 .to_string(),
1579 "AggregateFunction(sum, UInt64)"
1580 );
1581 assert_eq!(DataTypeNode::FixedString(16).to_string(), "FixedString(16)");
1582 assert_eq!(
1583 DataTypeNode::Variant(vec![DataTypeNode::UInt8, DataTypeNode::Bool]).to_string(),
1584 "Variant(UInt8, Bool)"
1585 );
1586 }
1587
1588 #[test]
1589 fn test_datetime64_to_string() {
1590 let test_cases = [
1591 (
1592 DataTypeNode::DateTime64(DateTimePrecision::Precision0, None),
1593 "DateTime64(0)",
1594 ),
1595 (
1596 DataTypeNode::DateTime64(DateTimePrecision::Precision1, None),
1597 "DateTime64(1)",
1598 ),
1599 (
1600 DataTypeNode::DateTime64(DateTimePrecision::Precision2, None),
1601 "DateTime64(2)",
1602 ),
1603 (
1604 DataTypeNode::DateTime64(DateTimePrecision::Precision3, None),
1605 "DateTime64(3)",
1606 ),
1607 (
1608 DataTypeNode::DateTime64(DateTimePrecision::Precision4, None),
1609 "DateTime64(4)",
1610 ),
1611 (
1612 DataTypeNode::DateTime64(DateTimePrecision::Precision5, None),
1613 "DateTime64(5)",
1614 ),
1615 (
1616 DataTypeNode::DateTime64(DateTimePrecision::Precision6, None),
1617 "DateTime64(6)",
1618 ),
1619 (
1620 DataTypeNode::DateTime64(DateTimePrecision::Precision7, None),
1621 "DateTime64(7)",
1622 ),
1623 (
1624 DataTypeNode::DateTime64(DateTimePrecision::Precision8, None),
1625 "DateTime64(8)",
1626 ),
1627 (
1628 DataTypeNode::DateTime64(DateTimePrecision::Precision9, None),
1629 "DateTime64(9)",
1630 ),
1631 (
1632 DataTypeNode::DateTime64(DateTimePrecision::Precision0, Some("UTC".to_string())),
1633 "DateTime64(0, 'UTC')",
1634 ),
1635 (
1636 DataTypeNode::DateTime64(
1637 DateTimePrecision::Precision3,
1638 Some("America/New_York".to_string()),
1639 ),
1640 "DateTime64(3, 'America/New_York')",
1641 ),
1642 (
1643 DataTypeNode::DateTime64(
1644 DateTimePrecision::Precision6,
1645 Some("Europe/Amsterdam".to_string()),
1646 ),
1647 "DateTime64(6, 'Europe/Amsterdam')",
1648 ),
1649 (
1650 DataTypeNode::DateTime64(
1651 DateTimePrecision::Precision9,
1652 Some("Asia/Tokyo".to_string()),
1653 ),
1654 "DateTime64(9, 'Asia/Tokyo')",
1655 ),
1656 ];
1657 for (data_type, expected_str) in test_cases.iter() {
1658 assert_eq!(
1659 &data_type.to_string(),
1660 expected_str,
1661 "Expected data type {data_type} to be formatted as {expected_str}"
1662 );
1663 }
1664 }
1665
1666 #[test]
1667 fn test_interval_to_string() {
1668 assert_eq!(
1669 DataTypeNode::Interval(IntervalType::Nanosecond).to_string(),
1670 "IntervalNanosecond"
1671 );
1672 assert_eq!(
1673 DataTypeNode::Interval(IntervalType::Microsecond).to_string(),
1674 "IntervalMicrosecond"
1675 );
1676 assert_eq!(
1677 DataTypeNode::Interval(IntervalType::Millisecond).to_string(),
1678 "IntervalMillisecond"
1679 );
1680 assert_eq!(
1681 DataTypeNode::Interval(IntervalType::Second).to_string(),
1682 "IntervalSecond"
1683 );
1684 assert_eq!(
1685 DataTypeNode::Interval(IntervalType::Minute).to_string(),
1686 "IntervalMinute"
1687 );
1688 assert_eq!(
1689 DataTypeNode::Interval(IntervalType::Hour).to_string(),
1690 "IntervalHour"
1691 );
1692 assert_eq!(
1693 DataTypeNode::Interval(IntervalType::Day).to_string(),
1694 "IntervalDay"
1695 );
1696 assert_eq!(
1697 DataTypeNode::Interval(IntervalType::Week).to_string(),
1698 "IntervalWeek"
1699 );
1700 assert_eq!(
1701 DataTypeNode::Interval(IntervalType::Month).to_string(),
1702 "IntervalMonth"
1703 );
1704 assert_eq!(
1705 DataTypeNode::Interval(IntervalType::Quarter).to_string(),
1706 "IntervalQuarter"
1707 );
1708 assert_eq!(
1709 DataTypeNode::Interval(IntervalType::Year).to_string(),
1710 "IntervalYear"
1711 );
1712 }
1713
1714 #[test]
1715 fn test_data_type_node_into_string() {
1716 let data_type = DataTypeNode::new("Array(Int32)").unwrap();
1717 let data_type_string: String = data_type.into();
1718 assert_eq!(data_type_string, "Array(Int32)");
1719 }
1720
1721 #[test]
1722 fn test_data_type_to_string_geo() {
1723 assert_eq!(DataTypeNode::Point.to_string(), "Point");
1724 assert_eq!(DataTypeNode::Ring.to_string(), "Ring");
1725 assert_eq!(DataTypeNode::LineString.to_string(), "LineString");
1726 assert_eq!(DataTypeNode::Polygon.to_string(), "Polygon");
1727 assert_eq!(DataTypeNode::MultiLineString.to_string(), "MultiLineString");
1728 assert_eq!(DataTypeNode::MultiPolygon.to_string(), "MultiPolygon");
1729 }
1730
1731 #[test]
1732 fn test_display_column() {
1733 let column = Column::new(
1734 "col".to_string(),
1735 DataTypeNode::new("Array(Int32)").unwrap(),
1736 );
1737 assert_eq!(column.to_string(), "col: Array(Int32)");
1738 }
1739
1740 #[test]
1741 fn test_display_decimal_size() {
1742 assert_eq!(DecimalType::Decimal32.to_string(), "Decimal32");
1743 assert_eq!(DecimalType::Decimal64.to_string(), "Decimal64");
1744 assert_eq!(DecimalType::Decimal128.to_string(), "Decimal128");
1745 assert_eq!(DecimalType::Decimal256.to_string(), "Decimal256");
1746 }
1747
1748 #[test]
1749 fn test_time_time64_roundtrip_and_edges() {
1750 use super::DateTimePrecision::*;
1751
1752 assert_eq!(DataTypeNode::new("Time").unwrap(), DataTypeNode::Time);
1754
1755 assert_eq!(
1757 DataTypeNode::new("Time('UTC')").unwrap(),
1758 DataTypeNode::Time
1759 );
1760 assert_eq!(
1761 DataTypeNode::new("Time('Europe/Moscow')").unwrap(),
1762 DataTypeNode::Time
1763 );
1764
1765 assert_eq!(
1767 DataTypeNode::new("Time64(0)").unwrap(),
1768 DataTypeNode::Time64(Precision0)
1769 );
1770
1771 assert_eq!(
1773 DataTypeNode::new("Time64(9, 'Europe/Amsterdam')").unwrap(),
1774 DataTypeNode::Time64(Precision9)
1775 );
1776
1777 assert_eq!(
1779 DataTypeNode::new("Time64(0, 'UTC')").unwrap(),
1780 DataTypeNode::Time64(Precision0)
1781 );
1782
1783 assert_eq!(
1785 DataTypeNode::new("Time64(3)").unwrap(),
1786 DataTypeNode::Time64(Precision3)
1787 );
1788
1789 assert_eq!(
1791 DataTypeNode::new("Time64(6, 'America/New_York')").unwrap(),
1792 DataTypeNode::Time64(Precision6)
1793 );
1794
1795 assert!(DataTypeNode::new("Time64()").is_err());
1797
1798 assert!(DataTypeNode::new("Time64(x)").is_err());
1800 }
1801
1802 const ENUM_WITH_ESCAPING_STR: &str =
1803 "Enum8('f\\'' = 1, 'x =' = 2, 'b\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)";
1804
1805 fn enum_with_escaping() -> DataTypeNode {
1806 DataTypeNode::Enum(
1807 EnumType::Enum8,
1808 HashMap::from([
1809 (1, "f\\'".to_string()),
1810 (2, "x =".to_string()),
1811 (3, "b\\'\\'".to_string()),
1812 (42, "\\'c=4=".to_string()),
1813 (100, "4".to_string()),
1814 ]),
1815 )
1816 }
1817}