1use std::str::FromStr;
2use std::sync::Arc;
3
4use arrow::datatypes::*;
5
6use crate::geo::normalize_geo_type;
7use crate::{ArrowOptions, Error, Result, Type};
8
9pub type SchemaConversions = std::collections::HashMap<String, Type>;
11
12pub const LIST_ITEM_FIELD_NAME: &str = "item";
14pub const TUPLE_FIELD_NAME_PREFIX: &str = "field_";
16pub const MAP_FIELD_NAME: &str = "entries";
18pub const STRUCT_KEY_FIELD_NAME: &str = "key";
20pub const STRUCT_VALUE_FIELD_NAME: &str = "value";
22
23impl From<i256> for crate::i256 {
25 fn from(arrow_i256: i256) -> Self {
26 let bytes = arrow_i256.to_be_bytes();
28 crate::i256(bytes)
29 }
30}
31
32impl From<crate::i256> for i256 {
33 fn from(value: crate::i256) -> Self {
34 i256::from_be_bytes(value.0)
36 }
37}
38
39macro_rules! convert_to_enum {
40 ($enum_typ:expr, $low_card:expr, $values:expr) => {{
41 match $low_card.strip_null() {
42 $crate::Type::LowCardinality(inner) => {
43 let nullable = inner.is_nullable();
44 let inner_raw = inner.strip_null();
45 if matches!(inner_raw, $crate::Type::String | $crate::Type::Binary) {
46 let new_inner = $enum_typ($values);
47 if nullable { new_inner.into_nullable() } else { new_inner }
48 } else {
49 return Err($crate::Error::TypeConversion(format!(
50 "expected LowCardinality(String), found {}",
51 $low_card
52 )));
53 }
54 }
55 $crate::Type::String | $crate::Type::Binary => {
56 let nullable = $low_card.is_nullable();
57 let new_inner = $enum_typ($values);
58 if nullable { new_inner.into_nullable() } else { new_inner }
59 }
60 _ => {
61 return Err($crate::Error::TypeConversion(format!(
62 "expected LowCardinality(String) or String/Binary, found {}",
63 $low_card
64 )));
65 }
66 }
67 }};
68}
69
70fn generate_schema_options(options: Option<ArrowOptions>) -> (ArrowOptions, ArrowOptions) {
72 let strict_options = options.map_or(ArrowOptions::strict(), ArrowOptions::into_strict_ddl);
74 let conversion_options =
77 options.unwrap_or(ArrowOptions::default().with_nullable_array_default_empty(false));
78 (strict_options, conversion_options)
79}
80
81pub(crate) fn schema_conversion(
82 field: &Field,
83 conversions: Option<&SchemaConversions>,
84 options: Option<ArrowOptions>,
85) -> Result<Type> {
86 let name = field.name();
87 let data_type = field.data_type();
88 let field_nullable = field.is_nullable();
89
90 let (strict_opts, conversion_opts) = generate_schema_options(options);
91 Ok(match conversions.and_then(|c| c.get(name)).map(Type::strip_null) {
93 Some(Type::Enum8(values)) => {
94 let type_ = arrow_to_ch_type(data_type, field_nullable, Some(conversion_opts))?;
95 convert_to_enum!(Type::Enum8, type_, values.clone())
96 }
97 Some(Type::Enum16(values)) => {
98 let type_ = arrow_to_ch_type(data_type, field_nullable, Some(conversion_opts))?;
99 convert_to_enum!(Type::Enum16, type_, values.clone())
100 }
101 Some(conv @ (Type::Date | Type::Date32)) => {
102 let type_ = arrow_to_ch_type(data_type, field_nullable, Some(conversion_opts))?;
103 if !matches!(type_, Type::Date | Type::Date32) {
104 return Err(Error::TypeConversion(format!(
105 "expected Date or Date32, found {type_}",
106 )));
107 }
108 conv.clone()
109 }
110 Some(conv @ (Type::Ring | Type::Point | Type::Polygon | Type::MultiPolygon)) => {
112 conv.clone()
113 }
114 _ => arrow_to_ch_type(data_type, field_nullable, Some(strict_opts))?,
115 })
116}
117
118#[expect(clippy::cast_sign_loss)]
137pub(crate) fn normalize_type(type_: &Type, arrow_type: &DataType) -> Option<Type> {
138 let nullable = type_.is_nullable();
139 let type_ = match (type_.strip_null(), arrow_type) {
140 (Type::String, DataType::Binary | DataType::BinaryView | DataType::LargeBinary) => {
141 Some(Type::Binary)
142 }
143 (Type::String | Type::FixedSizedString(_) | Type::Binary, DataType::FixedSizeBinary(n)) => {
144 Some(Type::FixedSizedBinary(*n as usize))
145 }
146 (Type::Binary, DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View) => {
147 Some(Type::String)
148 }
149 (Type::FixedSizedBinary(n), DataType::Utf8 | DataType::Utf8View) => {
150 Some(Type::FixedSizedString(*n))
151 }
152 (
153 Type::Array(inner),
154 DataType::List(inner_field)
155 | DataType::ListView(inner_field)
156 | DataType::LargeList(inner_field)
157 | DataType::LargeListView(inner_field),
158 ) => normalize_type(inner, inner_field.data_type()).map(Box::new).map(Type::Array),
159 (Type::LowCardinality(inner), DataType::Dictionary(_, value_type)) => {
160 normalize_type(inner, value_type).map(Box::new).map(Type::LowCardinality)
161 }
162 (
163 Type::LowCardinality(inner),
164 t @ (DataType::Utf8
165 | DataType::Utf8View
166 | DataType::LargeUtf8
167 | DataType::Binary
168 | DataType::LargeBinary
169 | DataType::BinaryView
170 | DataType::FixedSizeBinary(_)),
171 ) => normalize_type(inner, t).map(Box::new).map(Type::LowCardinality),
172 (Type::Tuple(inner), DataType::Struct(inner_fields)) => {
173 let mut deferred_vec: Option<Vec<Type>> = None;
174
175 for (i, (inner_type, field)) in inner.iter().zip(inner_fields.iter()).enumerate() {
176 if let Some(normalized_type) = normalize_type(inner_type, field.data_type()) {
177 if deferred_vec.is_none() {
179 let mut vec = Vec::with_capacity(inner.len());
180 vec.extend(inner[..i].iter().cloned());
181 deferred_vec = Some(vec);
182 }
183
184 deferred_vec.as_mut().unwrap().push(normalized_type);
186 } else if let Some(vec) = &mut deferred_vec {
187 vec.push(inner_type.clone());
189 }
190 }
191
192 deferred_vec.map(Type::Tuple)
193 }
194 _ => return None,
195 };
196
197 if nullable { type_.map(Type::into_nullable) } else { type_ }
198}
199
200#[expect(clippy::cast_sign_loss)]
204#[expect(clippy::too_many_lines)]
205pub(crate) fn arrow_to_ch_type(
206 data_type: &DataType,
207 mut is_nullable: bool,
208 options: Option<ArrowOptions>,
209) -> Result<Type> {
210 let tz_map = |tz: Option<&str>| {
211 tz.and_then(|s| chrono_tz::Tz::from_str(s).ok()).unwrap_or(chrono_tz::Tz::UTC)
212 };
213
214 let inner_type = match data_type {
216 DataType::Int8 => Type::Int8,
217 DataType::Int16 => Type::Int16,
218 DataType::Int32 => Type::Int32,
219 DataType::Int64 | DataType::Interval(_) => Type::Int64,
220 DataType::UInt8 | DataType::Boolean => Type::UInt8,
221 DataType::UInt16 => Type::UInt16,
222 DataType::UInt32 => Type::UInt32,
223 DataType::UInt64 => Type::UInt64,
224 DataType::Float32 => Type::Float32,
225 DataType::Float64 => Type::Float64,
226 DataType::Decimal32(_, s) => Type::Decimal32(*s as usize),
227 DataType::Decimal64(p, s) => match *p {
228 p if p <= 9 => Type::Decimal32(*s as usize),
229 _ => Type::Decimal64(*s as usize),
230 },
231 DataType::Decimal128(p, s) => match *p {
232 p if p <= 9 => Type::Decimal32(*s as usize),
233 p if p <= 18 => Type::Decimal64(*s as usize),
234 p if p <= 38 => Type::Decimal128(*s as usize),
235 _ => Type::Decimal256(*s as usize), },
237 DataType::Decimal256(_, s) => Type::Decimal256(*s as usize),
238 DataType::Date32 if options.is_some_and(|o| o.use_date32_for_date) => Type::Date32 ,
240 DataType::Date32 => Type::Date,
241 DataType::Time32(TimeUnit::Second)
242 | DataType::Time64(TimeUnit::Second)
243 | DataType::Duration(TimeUnit::Second) => Type::DateTime(chrono_tz::Tz::UTC),
244 DataType::Date64
245 | DataType::Duration(TimeUnit::Millisecond)
246 | DataType::Time32(TimeUnit::Millisecond)
247 | DataType::Time64(TimeUnit::Millisecond) => Type::DateTime64(3, chrono_tz::Tz::UTC),
248 DataType::Time64(TimeUnit::Microsecond) | DataType::Duration(TimeUnit::Microsecond) => {
249 Type::DateTime64(6, chrono_tz::Tz::UTC)
250 }
251 DataType::Time64(TimeUnit::Nanosecond) | DataType::Duration(TimeUnit::Nanosecond) => {
252 Type::DateTime64(9, chrono_tz::Tz::UTC)
253 }
254 DataType::Timestamp(TimeUnit::Second, tz) => Type::DateTime(tz_map(Some(tz.as_deref().unwrap_or("UTC")))),
255 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
256 Type::DateTime64(3, tz_map(Some(tz.as_deref().unwrap_or("UTC"))))
257 }
258 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
259 Type::DateTime64(6, tz_map(Some(tz.as_deref().unwrap_or("UTC"))))
260 }
261 DataType::Timestamp(TimeUnit::Nanosecond, tz) => Type::DateTime64(9, tz_map(Some(tz.as_deref().unwrap_or("UTC")))),
262 DataType::Time32(TimeUnit::Nanosecond) => Type::DateTime64(9, chrono_tz::Tz::UTC),
263 DataType::FixedSizeBinary(s) => Type::FixedSizedBinary(*s as usize),
264 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Type::String,
265 DataType::List(f)
266 | DataType::LargeList(f)
267 | DataType::ListView(f)
268 | DataType::LargeListView(f)
269 | DataType::FixedSizeList(f, _) => {
270 if is_nullable && options.is_some_and(|o|
272 o.strict_schema && !o.nullable_array_default_empty
273 ) {
274 return Err(Error::TypeConversion(
275 "ClickHouse does not support nullable Lists".to_string(),
276 ));
277 }
278
279 Type::Array(Box::new(
280 arrow_to_ch_type(f.data_type(), f.is_nullable(), options)?
281 ))
282 }
283 DataType::Dictionary(_, value_type) => {
284 if is_nullable && options.is_some_and(|o| o.strict_schema) {
285 return Err(Error::TypeConversion(
286 "ClickHouse does not support nullable Dictionary".to_string(),
287 ));
288 }
289 let nullable = is_nullable;
292 is_nullable = false;
293 Type::LowCardinality(Box::new(arrow_to_ch_type(value_type, nullable, options)?))
294 }
295 DataType::Struct(fields) => {
296 let ch_types = fields
297 .iter()
298 .map(|f| arrow_to_ch_type(f.data_type(), f.is_nullable(), options))
299 .collect::<Result<_>>()?;
300 Type::Tuple(ch_types)
301 }
302 DataType::Map(key, _) => {
303 let DataType::Struct(inner) = key.data_type() else {
304 return Err(Error::ArrowDeserialize(format!(
305 "Unexpected key type for map: {key:?}"
306 )));
307 };
308
309 let (key_field, value_field) = if inner.len() >= 2 {
310 (&inner[0], &inner[1])
311 } else {
312 return Err(Error::ArrowDeserialize(
313 "Map inner fields malformed".into(),
314 ));
315 };
316
317 let key_type =
318 arrow_to_ch_type(key_field.data_type(), key_field.is_nullable(), options)?;
319 let value_type =
320 arrow_to_ch_type(value_field.data_type(), value_field.is_nullable(), options)?;
321
322 Type::Map(Box::new(key_type), Box::new(value_type))
323 }
324 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Type::Binary,
325 DataType::Time32(TimeUnit::Microsecond) => {
327 Type::DateTime64(6, chrono_tz::Tz::UTC)
329 }
330 DataType::Null
331 | DataType::Float16
332 | DataType::Union(_, _)
333 | DataType::RunEndEncoded(_, _) => {
335 return Err(Error::ArrowUnsupportedType(format!(
336 "Arrow data type is not supported: {data_type:?}"
337 )));
338 }
339 };
340
341 Ok(if is_nullable && !matches!(inner_type, Type::Array(_) | Type::Map(_, _)) {
343 Type::Nullable(Box::new(inner_type))
344 } else {
345 inner_type
346 })
347}
348
349#[expect(clippy::too_many_lines)]
364#[expect(clippy::cast_possible_truncation)]
365#[expect(clippy::cast_possible_wrap)]
366pub fn ch_to_arrow_type(ch_type: &Type, options: Option<ArrowOptions>) -> Result<(DataType, bool)> {
367 let mut is_null = ch_type.is_nullable();
368 let inner_type = ch_type.strip_null();
369
370 let arrow_type = match inner_type {
372 Type::Int8 => DataType::Int8,
374 Type::Int16 => DataType::Int16,
375 Type::Int32 => DataType::Int32,
376 Type::Int64 => DataType::Int64,
377 Type::UInt8 => DataType::UInt8,
378 Type::UInt16 => DataType::UInt16,
379 Type::UInt32 => DataType::UInt32,
380 Type::UInt64 => DataType::UInt64,
381 Type::Int128 | Type::UInt128 | Type::Ipv6 | Type::Uuid => DataType::FixedSizeBinary(16),
382 Type::Int256 | Type::UInt256 => DataType::FixedSizeBinary(32),
383 Type::Float32 => DataType::Float32,
384 Type::Float64 => DataType::Float64,
385 Type::Decimal32(s) => DataType::Decimal128(9, *s as i8),
386 Type::Decimal64(s) => DataType::Decimal128(18, *s as i8),
387 Type::Decimal128(s) => DataType::Decimal128(38, *s as i8),
388 Type::Decimal256(s) => DataType::Decimal256(76, *s as i8),
389 Type::String => {
390 if options.is_some_and(|o| o.strings_as_strings) {
391 DataType::Utf8
392 } else {
393 DataType::Binary
394 }
395 }
396 Type::FixedSizedString(len) | Type::FixedSizedBinary(len) => {
397 DataType::FixedSizeBinary(*len as i32)
398 }
399 Type::Binary => DataType::Binary,
400 Type::Object => DataType::Utf8,
401 Type::Date32 | Type::Date => DataType::Date32,
402 Type::DateTime(tz) => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(tz.name()))),
403 Type::DateTime64(p, tz) => match p {
404 0 => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(tz.name()))),
405 1..=3 => DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(tz.name()))),
406 4..=6 => DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(tz.name()))),
407 7..=9 => DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(tz.name()))),
408 _ => {
409 return Err(Error::ArrowUnsupportedType(format!(
410 "DateTime64 precision must be 0-9, received {p}"
411 )));
412 }
413 },
414 Type::Ipv4 => DataType::FixedSizeBinary(4),
415 Type::Array(inner_type) => {
416 if is_null
417 && options.is_some_and(|o| o.strict_schema && !o.nullable_array_default_empty)
418 {
419 return Err(Error::TypeConversion(
420 "ClickHouse does not support nullable Arrays".to_string(),
421 ));
422 }
423 let (inner_arrow_type, is_null) = ch_to_arrow_type(inner_type, options)?;
424 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_arrow_type, is_null)))
425 }
426 Type::Tuple(types) => {
427 let fields: Vec<Field> = types
428 .iter()
429 .enumerate()
430 .map(|(i, t)| {
431 ch_to_arrow_type(t, options).map(|(arrow_type, is_null)| {
432 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}{i}"), arrow_type, is_null)
433 })
434 })
435 .collect::<Result<Vec<_>>>()?;
436 DataType::Struct(fields.into())
437 }
438 Type::Map(key_type, value_type) => {
439 let (key_arrow_type, _) = ch_to_arrow_type(key_type, options)?;
440 let (value_arrow_type, is_null) = ch_to_arrow_type(value_type, options)?;
441 DataType::Map(
442 Arc::new(Field::new(
443 MAP_FIELD_NAME,
444 DataType::Struct(
445 vec![
446 Field::new(STRUCT_KEY_FIELD_NAME, key_arrow_type, false),
447 Field::new(STRUCT_VALUE_FIELD_NAME, value_arrow_type, is_null),
448 ]
449 .into(),
450 ),
451 false,
452 )),
453 false,
454 )
455 }
456 Type::LowCardinality(inner_type) => {
457 if is_null && options.is_some_and(|o| o.strict_schema) {
458 return Err(Error::TypeConversion(
459 "ClickHouse does not support nullable LowCardinality".to_string(),
460 ));
461 }
462
463 is_null = inner_type.is_nullable();
465
466 DataType::Dictionary(
467 Box::new(DataType::Int32),
468 Box::new(ch_to_arrow_type(inner_type, options)?.0),
469 )
470 }
471 Type::Enum8(_) => DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
472 Type::Enum16(_) => {
473 DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8))
474 }
475 Type::Point | Type::Ring | Type::Polygon | Type::MultiPolygon => {
476 let normalized = normalize_geo_type(ch_type).unwrap();
478 return ch_to_arrow_type(&normalized, options);
479 }
480 Type::Nullable(_) => unreachable!(),
482 };
483
484 Ok((arrow_type, is_null))
485}
486
487#[cfg(test)]
488mod tests {
489 use std::collections::HashMap;
490 use std::sync::Arc;
491
492 use arrow::datatypes::{DataType, Field, TimeUnit};
493 use chrono_tz::Tz;
494
495 use super::*;
496
497 #[test]
498 fn test_i256_conversions() {
499 let arrow_i256 = i256::from_i128(123_456_789);
501 let ch_i256: crate::i256 = arrow_i256.into();
502 let back_to_arrow: i256 = ch_i256.into();
503 assert_eq!(arrow_i256, back_to_arrow);
504
505 let arrow_i256 = i256::from_i128(0);
507 let ch_i256: crate::i256 = arrow_i256.into();
508 let back_to_arrow: i256 = ch_i256.into();
509 assert_eq!(arrow_i256, back_to_arrow);
510
511 let arrow_i256 = i256::from_i128(-987_654_321);
513 let ch_i256: crate::i256 = arrow_i256.into();
514 let back_to_arrow: i256 = ch_i256.into();
515 assert_eq!(arrow_i256, back_to_arrow);
516 }
517
518 #[test]
519 fn test_normalize_type() {
520 assert_eq!(normalize_type(&Type::String, &DataType::Binary), Some(Type::Binary));
522 assert_eq!(normalize_type(&Type::Binary, &DataType::Utf8), Some(Type::String));
523 assert_eq!(
524 normalize_type(&Type::FixedSizedBinary(4), &DataType::Utf8),
525 Some(Type::FixedSizedString(4))
526 );
527 assert_eq!(
528 normalize_type(&Type::String, &DataType::FixedSizeBinary(8)),
529 Some(Type::FixedSizedBinary(8))
530 );
531
532 let arrow_list =
534 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Binary, false)));
535 assert_eq!(
536 normalize_type(&Type::Array(Box::new(Type::String)), &arrow_list),
537 Some(Type::Array(Box::new(Type::Binary)))
538 );
539
540 let arrow_dict = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
542 assert_eq!(
543 normalize_type(&Type::LowCardinality(Box::new(Type::Binary)), &arrow_dict),
544 Some(Type::LowCardinality(Box::new(Type::String)))
545 );
546
547 let arrow_dict = DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
548 assert_eq!(
549 normalize_type(
550 &(Type::LowCardinality(Box::new(Type::Binary)).into_nullable()),
551 &arrow_dict
552 ),
553 Some(Type::LowCardinality(Box::new(Type::String)).into_nullable())
554 );
555
556 assert_eq!(
558 normalize_type(&Type::Nullable(Box::new(Type::String)), &DataType::Binary),
559 Some(Type::Nullable(Box::new(Type::Binary)))
560 );
561
562 assert_eq!(normalize_type(&Type::Int32, &DataType::Int32), None);
564
565 assert_eq!(normalize_type(&Type::Int32, &DataType::Float64), None);
567 }
568
569 #[test]
570 #[expect(clippy::too_many_lines)]
571 fn test_arrow_to_ch_type() {
572 assert_eq!(arrow_to_ch_type(&DataType::Int8, false, None).unwrap(), Type::Int8);
574 assert_eq!(arrow_to_ch_type(&DataType::UInt8, false, None).unwrap(), Type::UInt8);
575 assert_eq!(arrow_to_ch_type(&DataType::Float64, false, None).unwrap(), Type::Float64);
576
577 assert_eq!(
579 arrow_to_ch_type(&DataType::Decimal128(9, 2), false, None).unwrap(),
580 Type::Decimal32(2)
581 );
582 assert_eq!(
583 arrow_to_ch_type(&DataType::Decimal128(18, 4), false, None).unwrap(),
584 Type::Decimal64(4)
585 );
586 assert_eq!(
587 arrow_to_ch_type(&DataType::Decimal256(76, 6), false, None).unwrap(),
588 Type::Decimal256(6)
589 );
590
591 assert_eq!(arrow_to_ch_type(&DataType::Date32, false, None).unwrap(), Type::Date);
593 let datetimes = [
594 arrow_to_ch_type(&DataType::Time32(TimeUnit::Second), false, None).unwrap(),
595 arrow_to_ch_type(&DataType::Time64(TimeUnit::Second), false, None).unwrap(),
596 arrow_to_ch_type(&DataType::Duration(TimeUnit::Second), false, None).unwrap(),
597 ];
598 for dt in datetimes {
599 assert_eq!(dt, Type::DateTime(Tz::UTC));
600 }
601
602 let datetimes = [
603 arrow_to_ch_type(&DataType::Date64, false, None).unwrap(),
604 arrow_to_ch_type(&DataType::Duration(TimeUnit::Millisecond), false, None).unwrap(),
605 arrow_to_ch_type(&DataType::Time32(TimeUnit::Millisecond), false, None).unwrap(),
606 arrow_to_ch_type(&DataType::Time64(TimeUnit::Millisecond), false, None).unwrap(),
607 ];
608 for dt in datetimes {
609 assert_eq!(dt, Type::DateTime64(3, Tz::UTC));
610 }
611 let datetimes = [
612 arrow_to_ch_type(&DataType::Duration(TimeUnit::Microsecond), false, None).unwrap(),
613 arrow_to_ch_type(&DataType::Time64(TimeUnit::Microsecond), false, None).unwrap(),
614 ];
615 for dt in datetimes {
616 assert_eq!(dt, Type::DateTime64(6, Tz::UTC));
617 }
618 let datetimes = [
619 arrow_to_ch_type(&DataType::Duration(TimeUnit::Nanosecond), false, None).unwrap(),
620 arrow_to_ch_type(&DataType::Time32(TimeUnit::Nanosecond), false, None).unwrap(),
621 arrow_to_ch_type(&DataType::Time64(TimeUnit::Nanosecond), false, None).unwrap(),
622 ];
623 for dt in datetimes {
624 assert_eq!(dt, Type::DateTime64(9, Tz::UTC));
625 }
626 assert_eq!(
627 arrow_to_ch_type(
628 &DataType::Timestamp(TimeUnit::Second, Some(Arc::from("America/New_York"))),
629 false,
630 None
631 )
632 .unwrap(),
633 Type::DateTime(Tz::America__New_York)
634 );
635 assert_eq!(
636 arrow_to_ch_type(
637 &DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("America/New_York"))),
638 false,
639 None
640 )
641 .unwrap(),
642 Type::DateTime64(3, Tz::America__New_York)
643 );
644 assert_eq!(
645 arrow_to_ch_type(
646 &DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("America/New_York"))),
647 false,
648 None
649 )
650 .unwrap(),
651 Type::DateTime64(6, Tz::America__New_York)
652 );
653 assert_eq!(
654 arrow_to_ch_type(
655 &DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("America/New_York"))),
656 false,
657 None
658 )
659 .unwrap(),
660 Type::DateTime64(9, Tz::America__New_York)
661 );
662
663 let strings_types = [DataType::Utf8, DataType::Utf8View, DataType::LargeUtf8];
665 for s in strings_types {
666 assert_eq!(arrow_to_ch_type(&s, false, None).unwrap(), Type::String);
667 }
668
669 let binary_types = [DataType::Binary, DataType::BinaryView, DataType::LargeBinary];
670 for s in binary_types {
671 assert_eq!(arrow_to_ch_type(&s, false, None).unwrap(), Type::Binary);
672 }
673 assert_eq!(
674 arrow_to_ch_type(&DataType::FixedSizeBinary(4), false, None).unwrap(),
675 Type::FixedSizedBinary(4)
676 );
677
678 let list_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false));
680 let list_types = [
681 DataType::List(Arc::clone(&list_field)),
682 DataType::ListView(Arc::clone(&list_field)),
683 DataType::LargeList(list_field),
684 ];
685 for l in list_types {
686 assert_eq!(
687 arrow_to_ch_type(&l, false, None).unwrap(),
688 Type::Array(Box::new(Type::Int32))
689 );
690 }
691
692 let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
694 assert_eq!(
695 arrow_to_ch_type(&dict_type, false, None).unwrap(),
696 Type::LowCardinality(Box::new(Type::String))
697 );
698
699 assert_eq!(
701 arrow_to_ch_type(&DataType::Int32, true, None).unwrap(),
702 Type::Nullable(Box::new(Type::Int32))
703 );
704
705 assert!(arrow_to_ch_type(&DataType::Null, false, None).is_err());
707 assert!(arrow_to_ch_type(&DataType::Float16, false, None).is_err());
708 assert!(
709 arrow_to_ch_type(
710 &DataType::RunEndEncoded(
711 Field::new("", DataType::Int32, false).into(),
712 Field::new("", DataType::Utf8, false).into()
713 ),
714 false,
715 None
716 )
717 .is_err()
718 );
719 }
720
721 #[test]
722 fn test_ch_to_arrow_type() {
723 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
724
725 assert_eq!(ch_to_arrow_type(&Type::Int8, options).unwrap(), (DataType::Int8, false));
727 assert_eq!(ch_to_arrow_type(&Type::UInt8, options).unwrap(), (DataType::UInt8, false));
728 assert_eq!(ch_to_arrow_type(&Type::Float64, options).unwrap(), (DataType::Float64, false));
729
730 assert_eq!(
732 ch_to_arrow_type(&Type::Decimal32(2), options).unwrap(),
733 (DataType::Decimal128(9, 2), false)
734 );
735 assert_eq!(
736 ch_to_arrow_type(&Type::Decimal256(6), options).unwrap(),
737 (DataType::Decimal256(76, 6), false)
738 );
739
740 assert_eq!(
742 ch_to_arrow_type(&Type::DateTime(Tz::UTC), options).unwrap(),
743 (DataType::Timestamp(TimeUnit::Second, Some(Arc::from("UTC"))), false)
744 );
745 assert_eq!(
746 ch_to_arrow_type(&Type::DateTime64(6, Tz::America__New_York), options).unwrap(),
747 (
748 DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("America/New_York"))),
749 false
750 )
751 );
752
753 assert_eq!(ch_to_arrow_type(&Type::String, options).unwrap(), (DataType::Utf8, false));
755 assert_eq!(
756 ch_to_arrow_type(&Type::FixedSizedString(4), options).unwrap(),
757 (DataType::FixedSizeBinary(4), false)
758 );
759 assert_eq!(
760 ch_to_arrow_type(&Type::FixedSizedBinary(4), options).unwrap(),
761 (DataType::FixedSizeBinary(4), false)
762 );
763
764 assert_eq!(ch_to_arrow_type(&Type::String, None).unwrap(), (DataType::Binary, false));
766 assert_eq!(
768 ch_to_arrow_type(&Type::FixedSizedString(4), None).unwrap(),
769 (DataType::FixedSizeBinary(4), false)
770 );
771
772 assert_eq!(
774 ch_to_arrow_type(&Type::Array(Box::new(Type::Int32)), options).unwrap(),
775 (
776 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false))),
777 false
778 )
779 );
780
781 assert_eq!(
783 ch_to_arrow_type(&Type::LowCardinality(Box::new(Type::String)), None).unwrap(),
784 (DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)), false)
785 );
786
787 let tuple_type = Type::Tuple(vec![Type::Int32, Type::String]);
789 let expected_struct = DataType::Struct(
790 vec![
791 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}0"), DataType::Int32, false),
792 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}1"), DataType::Utf8, false),
793 ]
794 .into(),
795 );
796 assert_eq!(ch_to_arrow_type(&tuple_type, options).unwrap(), (expected_struct, false));
797
798 let map_type = Type::Map(Box::new(Type::String), Box::new(Type::Int32));
800 let expected_map = DataType::Map(
801 Arc::new(Field::new(
802 MAP_FIELD_NAME,
803 DataType::Struct(
804 vec![
805 Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
806 Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, false),
807 ]
808 .into(),
809 ),
810 false,
811 )),
812 false,
813 );
814 assert_eq!(ch_to_arrow_type(&map_type, options).unwrap(), (expected_map, false));
815
816 assert_eq!(
818 ch_to_arrow_type(&Type::Nullable(Box::new(Type::Int32)), options).unwrap(),
819 (DataType::Int32, true)
820 );
821
822 assert!(ch_to_arrow_type(&Type::DateTime64(10, Tz::UTC), options).is_err());
824 }
825
826 #[test]
828 fn test_arrow_to_ch_type_nullable_map() {
829 let options = Some(ArrowOptions::default());
830 let struct_field = Arc::new(Field::new(
831 MAP_FIELD_NAME,
832 DataType::Struct(Fields::from(vec![
833 Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
834 Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, true),
835 ])),
836 false,
837 ));
838 let map_type = DataType::Map(Arc::clone(&struct_field), false);
839
840 let ch_type = arrow_to_ch_type(&map_type, false, options).unwrap();
841 assert_eq!(
842 ch_type,
843 Type::Map(Box::new(Type::String), Box::new(Type::Nullable(Box::new(Type::Int32))))
844 );
845 }
846
847 #[test]
850 fn test_ch_to_arrow_type_nullable_map() {
851 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
852 let ch_type = Type::Map(Box::new(Type::String), Box::new(Type::Int32));
853 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, options).unwrap();
854
855 let expected_struct_field = Arc::new(Field::new(
856 MAP_FIELD_NAME,
857 DataType::Struct(Fields::from(vec![
858 Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
859 Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, false),
860 ])),
861 false,
862 ));
863 let expected_arrow_type = DataType::Map(Arc::clone(&expected_struct_field), false);
864
865 assert_eq!(arrow_type, expected_arrow_type);
866 assert!(!is_nullable);
867
868 let ch_type_nullable = Type::Nullable(Box::new(ch_type));
870 let (arrow_type_nullable, is_nullable_nullable) =
871 ch_to_arrow_type(&ch_type_nullable, options).unwrap();
872 assert_eq!(arrow_type_nullable, expected_arrow_type);
873 assert!(is_nullable_nullable);
874 }
875
876 #[test]
878 fn test_roundtrip_struct() {
879 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
881 let ch_type = Type::Tuple(vec![Type::Nullable(Box::new(Type::Int32)), Type::String]);
882 let struct_type = DataType::Struct(Fields::from(vec![
883 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}0"), DataType::Int32, true),
884 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}1"), DataType::Utf8, false),
885 ]));
886
887 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, options).unwrap();
888 assert_eq!(arrow_type, struct_type.clone());
889 assert!(!is_nullable);
890
891 let ch_type_back = arrow_to_ch_type(&struct_type, false, options).unwrap();
892 assert_eq!(ch_type_back, ch_type);
893 }
894
895 #[test]
898 fn test_roundtrip_tuple() {
899 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
900 let ch_type = Type::Tuple(vec![Type::Int32, Type::String]);
901
902 let expected_arrow_type = DataType::Struct(Fields::from(vec![
903 Field::new("field_0", DataType::Int32, false),
904 Field::new("field_1", DataType::Utf8, false),
905 ]));
906 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, options).unwrap();
907
908 assert_eq!(arrow_type, expected_arrow_type);
909 assert!(!is_nullable);
910
911 let ch_type_back = arrow_to_ch_type(&expected_arrow_type, false, options).unwrap();
912 assert_eq!(ch_type_back, ch_type);
913 }
914
915 #[test]
918 fn test_roundtrip_dictionary() {
919 let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
920 let field = Arc::new(Field::new("col", dict_type.clone(), false));
921 let nullable_dict_type =
922 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
923
924 let ch_type = arrow_to_ch_type(&nullable_dict_type, field.is_nullable(), None).unwrap();
925 assert_eq!(ch_type, Type::LowCardinality(Box::new(Type::String)));
926
927 let ch_type_nullable = arrow_to_ch_type(&nullable_dict_type, true, None).unwrap();
929 assert_eq!(
930 ch_type_nullable,
931 Type::LowCardinality(Box::new(Type::Nullable(Box::new(Type::String))))
932 );
933
934 let ch_type_back = arrow_to_ch_type(&nullable_dict_type, false, None).unwrap();
935 assert_eq!(ch_type_back, ch_type);
936
937 let options_err = Some(ArrowOptions::default().with_strict_schema(true));
938 assert!(arrow_to_ch_type(&nullable_dict_type, true, options_err).is_err());
939 }
940
941 #[test]
944 fn test_roundtrip_nested_nullable_array() {
945 let ch_type =
946 Type::Array(Box::new(Type::Nullable(Box::new(Type::Array(Box::new(Type::Int32))))));
947 let expected_nullable_list_field = Arc::new(Field::new(
948 LIST_ITEM_FIELD_NAME,
949 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false))),
950 true,
951 ));
952 let expected_arrow_type = DataType::List(Arc::clone(&expected_nullable_list_field));
953
954 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, None).unwrap();
955 assert_eq!(arrow_type, expected_arrow_type);
956 assert!(!is_nullable);
957
958 let ch_type_nullable = Type::Nullable(Box::new(ch_type.clone()));
960 let (arrow_type_nullable, is_nullable_nullable) =
961 ch_to_arrow_type(&ch_type_nullable, None).unwrap();
962 assert_eq!(arrow_type_nullable, expected_arrow_type);
963 assert!(is_nullable_nullable);
964
965 assert!(
967 arrow_to_ch_type(
968 &expected_arrow_type,
969 true,
970 Some(
971 ArrowOptions::default()
972 .with_strict_schema(true)
973 .with_nullable_array_default_empty(false)
974 )
975 )
976 .is_err()
977 );
978
979 let ch_type_back = arrow_to_ch_type(&expected_arrow_type, false, None).unwrap();
982 let expected_back = Type::Array(Box::new(Type::Array(Box::new(Type::Int32))));
983 assert_eq!(ch_type_back, expected_back);
984 }
985
986 #[test]
988 fn test_roundtrip_low_cardinality_int32() {
989 let options_err = Some(ArrowOptions::default().with_strict_schema(true));
990 let ch_type = Type::LowCardinality(Box::new(Type::Int32));
991 let expected_arrow_type =
992 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32));
993
994 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, None).unwrap();
995 assert_eq!(arrow_type, expected_arrow_type);
996 assert!(!is_nullable);
997
998 let ch_type_nullable = Type::Nullable(Box::new(ch_type.clone()));
999 assert!(ch_to_arrow_type(&ch_type_nullable, options_err).is_err());
1000
1001 let ch_type_back = arrow_to_ch_type(&expected_arrow_type, is_nullable, None).unwrap();
1002 assert_eq!(ch_type_back, ch_type);
1003
1004 assert!(arrow_to_ch_type(&expected_arrow_type, true, options_err).is_err());
1005 }
1006
1007 #[test]
1010 fn test_round_trip_low_cardinality_nullable() {
1011 let ch_type = Type::Nullable(Box::new(Type::LowCardinality(Box::new(Type::Nullable(
1012 Box::new(Type::String),
1013 )))));
1014 let expected_arrow_type =
1016 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary));
1017
1018 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, None).unwrap();
1019 assert_eq!(arrow_type, expected_arrow_type);
1020
1021 assert!(is_nullable);
1023
1024 let ch_type_back = arrow_to_ch_type(&arrow_type, is_nullable, None).unwrap();
1025 assert_eq!(
1026 ch_type_back,
1027 Type::LowCardinality(Box::new(Type::Nullable(Box::new(Type::Binary))))
1028 );
1029 }
1030
1031 #[test]
1032 #[expect(clippy::too_many_lines)]
1033 fn test_schema_conversion() {
1034 let arrow_options = Some(
1035 ArrowOptions::default()
1036 .with_strings_as_strings(true)
1038 .with_use_date32_for_date(true)
1040 .with_strict_schema(false),
1042 );
1043
1044 let fields = [
1046 Field::new("string_field", DataType::Utf8, false),
1047 Field::new("binary_field", DataType::Binary, false),
1048 Field::new("nullable_string_field", DataType::Utf8, true),
1049 Field::new(
1050 "nullable_dict_field",
1051 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1052 true,
1053 ),
1054 Field::new(
1055 "nullable_dict_16_field",
1056 DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
1057 true,
1058 ),
1059 Field::new("date_field", DataType::Date32, false),
1060 Field::new("int_field", DataType::Int32, false),
1061 ];
1062
1063 let mut conversions = HashMap::new();
1065 drop(conversions.insert(
1066 "string_field".to_string(),
1067 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]),
1068 ));
1069 drop(conversions.insert(
1070 "binary_field".to_string(),
1071 Type::Enum16(vec![("x".to_string(), 1), ("y".to_string(), 2)]),
1072 ));
1073 drop(conversions.insert(
1074 "nullable_string_field".to_string(),
1075 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]).into_nullable(),
1076 ));
1077 drop(conversions.insert(
1078 "nullable_dict_field".to_string(),
1079 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]).into_nullable(),
1080 ));
1081 drop(conversions.insert(
1082 "nullable_dict_16_field".to_string(),
1083 Type::Enum16(vec![("x".to_string(), 1), ("y".to_string(), 2)]).into_nullable(),
1084 ));
1085 drop(conversions.insert("date_field".to_string(), Type::Date));
1086 drop(conversions.insert(
1087 "int_field".to_string(),
1088 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]),
1089 ));
1090
1091 let string_field = &fields[0];
1093 let result = schema_conversion(string_field, Some(&conversions), arrow_options);
1094 assert!(result.is_ok());
1095 assert_eq!(result.unwrap(), Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]));
1096
1097 let binary_field = &fields[1];
1099 let result = schema_conversion(binary_field, Some(&conversions), arrow_options);
1100 assert!(result.is_ok());
1101 assert_eq!(result.unwrap(), Type::Enum16(vec![("x".to_string(), 1), ("y".to_string(), 2)]));
1102
1103 let nullable_string_field = &fields[2];
1105 let result = schema_conversion(nullable_string_field, Some(&conversions), arrow_options);
1106 assert!(result.is_ok());
1107 assert_eq!(
1108 result.unwrap(),
1109 Type::Nullable(Box::new(Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)])))
1110 );
1111
1112 let nullable_string_dict_field = &fields[3];
1114 let result =
1115 schema_conversion(nullable_string_dict_field, Some(&conversions), arrow_options);
1116 assert!(result.is_ok());
1117 assert_eq!(
1118 result.unwrap(),
1119 Type::Nullable(Box::new(Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)])))
1120 );
1121
1122 let nullable_string_dict_16_field = &fields[4];
1124 let result =
1125 schema_conversion(nullable_string_dict_16_field, Some(&conversions), arrow_options);
1126 assert!(result.is_ok());
1127 assert_eq!(
1128 result.unwrap(),
1129 Type::Nullable(Box::new(Type::Enum16(vec![
1130 ("x".to_string(), 1),
1131 ("y".to_string(), 2)
1132 ])))
1133 );
1134
1135 let date_field = &fields[5];
1137 let result = schema_conversion(date_field, Some(&conversions), arrow_options);
1138 assert!(result.is_ok());
1139 assert_eq!(result.unwrap(), Type::Date);
1140
1141 let int_field = &fields[6];
1143 let result = schema_conversion(int_field, Some(&conversions), arrow_options);
1144 assert!(result.is_err());
1145 assert_eq!(
1146 result.unwrap_err().to_string(),
1147 "type conversion failure: expected LowCardinality(String) or String/Binary, found \
1148 Int32"
1149 );
1150
1151 let result = schema_conversion(string_field, None, arrow_options);
1153 assert!(result.is_ok());
1154 assert_eq!(result.unwrap(), Type::String);
1155
1156 let mut bad_conversions = HashMap::new();
1158 drop(bad_conversions.insert("string_field".to_string(), Type::Date));
1159 let result = schema_conversion(string_field, Some(&bad_conversions), arrow_options);
1160 assert!(result.is_err());
1161 assert_eq!(
1162 result.unwrap_err().to_string(),
1163 "type conversion failure: expected Date or Date32, found String"
1164 );
1165
1166 let conversion_opts_date32 = arrow_options.map(|o| o.with_use_date32_for_date(true));
1168 let result = schema_conversion(date_field, None, conversion_opts_date32);
1169 assert!(result.is_ok());
1170 assert_eq!(result.unwrap(), Type::Date32);
1171 }
1172}