1use std::str::FromStr;
2use std::sync::Arc;
3
4use arrow::datatypes::*;
5
6use crate::geo::normalize_geo_type;
7use crate::{ArrowOptions, Error, Result, Type};
8
9pub type SchemaConversions = std::collections::HashMap<String, Type>;
11
12pub const LIST_ITEM_FIELD_NAME: &str = "item";
14pub const TUPLE_FIELD_NAME_PREFIX: &str = "field_";
16pub const MAP_FIELD_NAME: &str = "entries";
18pub const STRUCT_KEY_FIELD_NAME: &str = "key";
20pub const STRUCT_VALUE_FIELD_NAME: &str = "value";
22
23impl From<i256> for crate::i256 {
25 fn from(arrow_i256: i256) -> Self {
26 let bytes = arrow_i256.to_be_bytes();
28 crate::i256(bytes)
29 }
30}
31
32impl From<crate::i256> for i256 {
33 fn from(value: crate::i256) -> Self {
34 i256::from_be_bytes(value.0)
36 }
37}
38
39macro_rules! convert_to_enum {
40 ($enum_typ:expr, $low_card:expr, $values:expr) => {{
41 match $low_card.strip_null() {
42 $crate::Type::LowCardinality(inner) => {
43 let nullable = inner.is_nullable();
44 let inner_raw = inner.strip_null();
45 if matches!(inner_raw, $crate::Type::String | $crate::Type::Binary) {
46 let new_inner = $enum_typ($values);
47 if nullable { new_inner.into_nullable() } else { new_inner }
48 } else {
49 return Err($crate::Error::TypeConversion(format!(
50 "expected LowCardinality(String), found {}",
51 $low_card
52 )));
53 }
54 }
55 $crate::Type::String | $crate::Type::Binary => {
56 let nullable = $low_card.is_nullable();
57 let new_inner = $enum_typ($values);
58 if nullable { new_inner.into_nullable() } else { new_inner }
59 }
60 _ => {
61 return Err($crate::Error::TypeConversion(format!(
62 "expected LowCardinality(String) or String/Binary, found {}",
63 $low_card
64 )));
65 }
66 }
67 }};
68}
69
70fn generate_schema_options(options: Option<ArrowOptions>) -> (ArrowOptions, ArrowOptions) {
72 let strict_options = options.map_or(ArrowOptions::strict(), ArrowOptions::into_strict_ddl);
74 let conversion_options =
77 options.unwrap_or(ArrowOptions::default().with_nullable_array_default_empty(false));
78 (strict_options, conversion_options)
79}
80
81pub(crate) fn schema_conversion(
82 field: &Field,
83 conversions: Option<&SchemaConversions>,
84 options: Option<ArrowOptions>,
85) -> Result<Type> {
86 let name = field.name();
87 let data_type = field.data_type();
88 let field_nullable = field.is_nullable();
89
90 let (strict_opts, conversion_opts) = generate_schema_options(options);
91 Ok(match conversions.and_then(|c| c.get(name)).map(Type::strip_null) {
93 Some(Type::Enum8(values)) => {
94 let type_ = arrow_to_ch_type(data_type, field_nullable, Some(conversion_opts))?;
95 convert_to_enum!(Type::Enum8, type_, values.clone())
96 }
97 Some(Type::Enum16(values)) => {
98 let type_ = arrow_to_ch_type(data_type, field_nullable, Some(conversion_opts))?;
99 convert_to_enum!(Type::Enum16, type_, values.clone())
100 }
101 Some(conv @ (Type::Date | Type::Date32)) => {
102 let type_ = arrow_to_ch_type(data_type, field_nullable, Some(conversion_opts))?;
103 if !matches!(type_, Type::Date | Type::Date32) {
104 return Err(Error::TypeConversion(format!(
105 "expected Date or Date32, found {type_}",
106 )));
107 }
108 conv.clone()
109 }
110 Some(conv @ (Type::Ring | Type::Point | Type::Polygon | Type::MultiPolygon)) => {
112 conv.clone()
113 }
114 _ => arrow_to_ch_type(data_type, field_nullable, Some(strict_opts))?,
115 })
116}
117
118#[expect(clippy::cast_sign_loss)]
137pub(crate) fn normalize_type(type_: &Type, arrow_type: &DataType) -> Option<Type> {
138 let nullable = type_.is_nullable();
139 let type_ = match (type_.strip_null(), arrow_type) {
140 (Type::String, DataType::Binary | DataType::BinaryView | DataType::LargeBinary) => {
141 Some(Type::Binary)
142 }
143 (Type::String | Type::FixedSizedString(_) | Type::Binary, DataType::FixedSizeBinary(n)) => {
144 Some(Type::FixedSizedBinary(*n as usize))
145 }
146 (Type::Binary, DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View) => {
147 Some(Type::String)
148 }
149 (Type::FixedSizedBinary(n), DataType::Utf8 | DataType::Utf8View) => {
150 Some(Type::FixedSizedString(*n))
151 }
152 (
153 Type::Array(inner),
154 DataType::List(inner_field)
155 | DataType::ListView(inner_field)
156 | DataType::LargeList(inner_field)
157 | DataType::LargeListView(inner_field),
158 ) => normalize_type(inner, inner_field.data_type()).map(Box::new).map(Type::Array),
159 (Type::LowCardinality(inner), DataType::Dictionary(_, value_type)) => {
160 normalize_type(inner, value_type).map(Box::new).map(Type::LowCardinality)
161 }
162 (
163 Type::LowCardinality(inner),
164 t @ (DataType::Utf8
165 | DataType::Utf8View
166 | DataType::LargeUtf8
167 | DataType::Binary
168 | DataType::LargeBinary
169 | DataType::BinaryView
170 | DataType::FixedSizeBinary(_)),
171 ) => normalize_type(inner, t).map(Box::new).map(Type::LowCardinality),
172 (Type::Tuple(inner), DataType::Struct(inner_fields)) => {
173 let mut deferred_vec: Option<Vec<Type>> = None;
174
175 for (i, (inner_type, field)) in inner.iter().zip(inner_fields.iter()).enumerate() {
176 if let Some(normalized_type) = normalize_type(inner_type, field.data_type()) {
177 if deferred_vec.is_none() {
179 let mut vec = Vec::with_capacity(inner.len());
180 vec.extend(inner[..i].iter().cloned());
181 deferred_vec = Some(vec);
182 }
183
184 deferred_vec.as_mut().unwrap().push(normalized_type);
186 } else if let Some(vec) = &mut deferred_vec {
187 vec.push(inner_type.clone());
189 }
190 }
191
192 deferred_vec.map(Type::Tuple)
193 }
194 _ => return None,
195 };
196
197 if nullable { type_.map(Type::into_nullable) } else { type_ }
198}
199
200#[expect(clippy::cast_sign_loss)]
204#[expect(clippy::too_many_lines)]
205pub(crate) fn arrow_to_ch_type(
206 data_type: &DataType,
207 mut is_nullable: bool,
208 options: Option<ArrowOptions>,
209) -> Result<Type> {
210 let tz_map = |tz: Option<&str>| {
211 tz.and_then(|s| chrono_tz::Tz::from_str(s).ok()).unwrap_or(chrono_tz::Tz::UTC)
212 };
213
214 let inner_type = match data_type {
216 DataType::Int8 => Type::Int8,
217 DataType::Int16 => Type::Int16,
218 DataType::Int32 => Type::Int32,
219 DataType::Int64 | DataType::Interval(_) => Type::Int64,
220 DataType::UInt8 | DataType::Boolean => Type::UInt8,
221 DataType::UInt16 => Type::UInt16,
222 DataType::UInt32 => Type::UInt32,
223 DataType::UInt64 => Type::UInt64,
224 DataType::Float32 => Type::Float32,
225 DataType::Float64 => Type::Float64,
226 DataType::Decimal128(p, s) => match *p {
227 p if p <= 9 => Type::Decimal32(*s as usize),
228 p if p <= 18 => Type::Decimal64(*s as usize),
229 p if p <= 38 => Type::Decimal128(*s as usize),
230 _ => Type::Decimal256(*s as usize), },
232 DataType::Decimal256(_, s) => Type::Decimal256(*s as usize),
233 DataType::Date32 if options.is_some_and(|o| o.use_date32_for_date) => Type::Date32 ,
235 DataType::Date32 => Type::Date,
236 DataType::Time32(TimeUnit::Second)
237 | DataType::Time64(TimeUnit::Second)
238 | DataType::Duration(TimeUnit::Second) => Type::DateTime(chrono_tz::Tz::UTC),
239 DataType::Date64
240 | DataType::Duration(TimeUnit::Millisecond)
241 | DataType::Time32(TimeUnit::Millisecond)
242 | DataType::Time64(TimeUnit::Millisecond) => Type::DateTime64(3, chrono_tz::Tz::UTC),
243 DataType::Time64(TimeUnit::Microsecond) | DataType::Duration(TimeUnit::Microsecond) => {
244 Type::DateTime64(6, chrono_tz::Tz::UTC)
245 }
246 DataType::Time64(TimeUnit::Nanosecond) | DataType::Duration(TimeUnit::Nanosecond) => {
247 Type::DateTime64(9, chrono_tz::Tz::UTC)
248 }
249 DataType::Timestamp(TimeUnit::Second, tz) => Type::DateTime(tz_map(Some(tz.as_deref().unwrap_or("UTC")))),
250 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
251 Type::DateTime64(3, tz_map(Some(tz.as_deref().unwrap_or("UTC"))))
252 }
253 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
254 Type::DateTime64(6, tz_map(Some(tz.as_deref().unwrap_or("UTC"))))
255 }
256 DataType::Timestamp(TimeUnit::Nanosecond, tz) => Type::DateTime64(9, tz_map(Some(tz.as_deref().unwrap_or("UTC")))),
257 DataType::Time32(TimeUnit::Nanosecond) => Type::DateTime64(9, chrono_tz::Tz::UTC),
258 DataType::FixedSizeBinary(s) => Type::FixedSizedBinary(*s as usize),
259 DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => Type::String,
260 DataType::List(f)
261 | DataType::LargeList(f)
262 | DataType::ListView(f)
263 | DataType::LargeListView(f)
264 | DataType::FixedSizeList(f, _) => {
265 if is_nullable && options.is_some_and(|o|
267 o.strict_schema && !o.nullable_array_default_empty
268 ) {
269 return Err(Error::TypeConversion(
270 "ClickHouse does not support nullable Lists".to_string(),
271 ));
272 }
273
274 Type::Array(Box::new(
275 arrow_to_ch_type(f.data_type(), f.is_nullable(), options)?
276 ))
277 }
278 DataType::Dictionary(_, value_type) => {
279 if is_nullable && options.is_some_and(|o| o.strict_schema) {
280 return Err(Error::TypeConversion(
281 "ClickHouse does not support nullable Dictionary".to_string(),
282 ));
283 }
284 let nullable = is_nullable;
287 is_nullable = false;
288 Type::LowCardinality(Box::new(arrow_to_ch_type(value_type, nullable, options)?))
289 }
290 DataType::Struct(fields) => {
291 let ch_types = fields
292 .iter()
293 .map(|f| arrow_to_ch_type(f.data_type(), f.is_nullable(), options))
294 .collect::<Result<_>>()?;
295 Type::Tuple(ch_types)
296 }
297 DataType::Map(key, _) => {
298 let DataType::Struct(inner) = key.data_type() else {
299 return Err(Error::ArrowDeserialize(format!(
300 "Unexpected key type for map: {key:?}"
301 )));
302 };
303
304 let (key_field, value_field) = if inner.len() >= 2 {
305 (&inner[0], &inner[1])
306 } else {
307 return Err(Error::ArrowDeserialize(
308 "Map inner fields malformed".into(),
309 ));
310 };
311
312 let key_type =
313 arrow_to_ch_type(key_field.data_type(), key_field.is_nullable(), options)?;
314 let value_type =
315 arrow_to_ch_type(value_field.data_type(), value_field.is_nullable(), options)?;
316
317 Type::Map(Box::new(key_type), Box::new(value_type))
318 }
319 DataType::Binary | DataType::LargeBinary | DataType::BinaryView => Type::Binary,
320 DataType::Time32(TimeUnit::Microsecond) => {
322 Type::DateTime64(6, chrono_tz::Tz::UTC)
324 }
325 DataType::Null
326 | DataType::Float16
327 | DataType::Union(_, _)
328 | DataType::RunEndEncoded(_, _) => {
330 return Err(Error::ArrowUnsupportedType(format!(
331 "Arrow data type is not supported: {data_type:?}"
332 )));
333 }
334 };
335
336 Ok(if is_nullable && !matches!(inner_type, Type::Array(_) | Type::Map(_, _)) {
338 Type::Nullable(Box::new(inner_type))
339 } else {
340 inner_type
341 })
342}
343
344#[expect(clippy::too_many_lines)]
359#[expect(clippy::cast_possible_truncation)]
360#[expect(clippy::cast_possible_wrap)]
361pub fn ch_to_arrow_type(ch_type: &Type, options: Option<ArrowOptions>) -> Result<(DataType, bool)> {
362 let mut is_null = ch_type.is_nullable();
363 let inner_type = ch_type.strip_null();
364
365 let arrow_type = match inner_type {
367 Type::Int8 => DataType::Int8,
369 Type::Int16 => DataType::Int16,
370 Type::Int32 => DataType::Int32,
371 Type::Int64 => DataType::Int64,
372 Type::UInt8 => DataType::UInt8,
373 Type::UInt16 => DataType::UInt16,
374 Type::UInt32 => DataType::UInt32,
375 Type::UInt64 => DataType::UInt64,
376 Type::Int128 | Type::UInt128 | Type::Ipv6 | Type::Uuid => DataType::FixedSizeBinary(16),
377 Type::Int256 | Type::UInt256 => DataType::FixedSizeBinary(32),
378 Type::Float32 => DataType::Float32,
379 Type::Float64 => DataType::Float64,
380 Type::Decimal32(s) => DataType::Decimal128(9, *s as i8),
381 Type::Decimal64(s) => DataType::Decimal128(18, *s as i8),
382 Type::Decimal128(s) => DataType::Decimal128(38, *s as i8),
383 Type::Decimal256(s) => DataType::Decimal256(76, *s as i8),
384 Type::String => {
385 if options.is_some_and(|o| o.strings_as_strings) {
386 DataType::Utf8
387 } else {
388 DataType::Binary
389 }
390 }
391 Type::FixedSizedString(len) | Type::FixedSizedBinary(len) => {
392 DataType::FixedSizeBinary(*len as i32)
393 }
394 Type::Binary => DataType::Binary,
395 Type::Object => DataType::Utf8,
396 Type::Date32 | Type::Date => DataType::Date32,
397 Type::DateTime(tz) => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(tz.name()))),
398 Type::DateTime64(p, tz) => match p {
399 0 => DataType::Timestamp(TimeUnit::Second, Some(Arc::from(tz.name()))),
400 1..=3 => DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from(tz.name()))),
401 4..=6 => DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from(tz.name()))),
402 7..=9 => DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from(tz.name()))),
403 _ => {
404 return Err(Error::ArrowUnsupportedType(format!(
405 "DateTime64 precision must be 0-9, received {p}"
406 )));
407 }
408 },
409 Type::Ipv4 => DataType::FixedSizeBinary(4),
410 Type::Array(inner_type) => {
411 if is_null
412 && options.is_some_and(|o| o.strict_schema && !o.nullable_array_default_empty)
413 {
414 return Err(Error::TypeConversion(
415 "ClickHouse does not support nullable Arrays".to_string(),
416 ));
417 }
418 let (inner_arrow_type, is_null) = ch_to_arrow_type(inner_type, options)?;
419 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, inner_arrow_type, is_null)))
420 }
421 Type::Tuple(types) => {
422 let fields: Vec<Field> = types
423 .iter()
424 .enumerate()
425 .map(|(i, t)| {
426 ch_to_arrow_type(t, options).map(|(arrow_type, is_null)| {
427 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}{i}"), arrow_type, is_null)
428 })
429 })
430 .collect::<Result<Vec<_>>>()?;
431 DataType::Struct(fields.into())
432 }
433 Type::Map(key_type, value_type) => {
434 let (key_arrow_type, _) = ch_to_arrow_type(key_type, options)?;
435 let (value_arrow_type, is_null) = ch_to_arrow_type(value_type, options)?;
436 DataType::Map(
437 Arc::new(Field::new(
438 MAP_FIELD_NAME,
439 DataType::Struct(
440 vec![
441 Field::new(STRUCT_KEY_FIELD_NAME, key_arrow_type, false),
442 Field::new(STRUCT_VALUE_FIELD_NAME, value_arrow_type, is_null),
443 ]
444 .into(),
445 ),
446 false,
447 )),
448 false,
449 )
450 }
451 Type::LowCardinality(inner_type) => {
452 if is_null && options.is_some_and(|o| o.strict_schema) {
453 return Err(Error::TypeConversion(
454 "ClickHouse does not support nullable LowCardinality".to_string(),
455 ));
456 }
457
458 is_null = inner_type.is_nullable();
460
461 DataType::Dictionary(
462 Box::new(DataType::Int32),
463 Box::new(ch_to_arrow_type(inner_type, options)?.0),
464 )
465 }
466 Type::Enum8(_) => DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
467 Type::Enum16(_) => {
468 DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8))
469 }
470 Type::Point | Type::Ring | Type::Polygon | Type::MultiPolygon => {
471 let normalized = normalize_geo_type(ch_type).unwrap();
473 return ch_to_arrow_type(&normalized, options);
474 }
475 Type::Nullable(_) => unreachable!(),
477 };
478
479 Ok((arrow_type, is_null))
480}
481
482#[cfg(test)]
483mod tests {
484 use std::collections::HashMap;
485 use std::sync::Arc;
486
487 use arrow::datatypes::{DataType, Field, TimeUnit};
488 use chrono_tz::Tz;
489
490 use super::*;
491
492 #[test]
493 fn test_i256_conversions() {
494 let arrow_i256 = i256::from_i128(123_456_789);
496 let ch_i256: crate::i256 = arrow_i256.into();
497 let back_to_arrow: i256 = ch_i256.into();
498 assert_eq!(arrow_i256, back_to_arrow);
499
500 let arrow_i256 = i256::from_i128(0);
502 let ch_i256: crate::i256 = arrow_i256.into();
503 let back_to_arrow: i256 = ch_i256.into();
504 assert_eq!(arrow_i256, back_to_arrow);
505
506 let arrow_i256 = i256::from_i128(-987_654_321);
508 let ch_i256: crate::i256 = arrow_i256.into();
509 let back_to_arrow: i256 = ch_i256.into();
510 assert_eq!(arrow_i256, back_to_arrow);
511 }
512
513 #[test]
514 fn test_normalize_type() {
515 assert_eq!(normalize_type(&Type::String, &DataType::Binary), Some(Type::Binary));
517 assert_eq!(normalize_type(&Type::Binary, &DataType::Utf8), Some(Type::String));
518 assert_eq!(
519 normalize_type(&Type::FixedSizedBinary(4), &DataType::Utf8),
520 Some(Type::FixedSizedString(4))
521 );
522 assert_eq!(
523 normalize_type(&Type::String, &DataType::FixedSizeBinary(8)),
524 Some(Type::FixedSizedBinary(8))
525 );
526
527 let arrow_list =
529 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Binary, false)));
530 assert_eq!(
531 normalize_type(&Type::Array(Box::new(Type::String)), &arrow_list),
532 Some(Type::Array(Box::new(Type::Binary)))
533 );
534
535 let arrow_dict = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
537 assert_eq!(
538 normalize_type(&Type::LowCardinality(Box::new(Type::Binary)), &arrow_dict),
539 Some(Type::LowCardinality(Box::new(Type::String)))
540 );
541
542 let arrow_dict = DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
543 assert_eq!(
544 normalize_type(
545 &(Type::LowCardinality(Box::new(Type::Binary)).into_nullable()),
546 &arrow_dict
547 ),
548 Some(Type::LowCardinality(Box::new(Type::String)).into_nullable())
549 );
550
551 assert_eq!(
553 normalize_type(&Type::Nullable(Box::new(Type::String)), &DataType::Binary),
554 Some(Type::Nullable(Box::new(Type::Binary)))
555 );
556
557 assert_eq!(normalize_type(&Type::Int32, &DataType::Int32), None);
559
560 assert_eq!(normalize_type(&Type::Int32, &DataType::Float64), None);
562 }
563
564 #[test]
565 #[expect(clippy::too_many_lines)]
566 fn test_arrow_to_ch_type() {
567 assert_eq!(arrow_to_ch_type(&DataType::Int8, false, None).unwrap(), Type::Int8);
569 assert_eq!(arrow_to_ch_type(&DataType::UInt8, false, None).unwrap(), Type::UInt8);
570 assert_eq!(arrow_to_ch_type(&DataType::Float64, false, None).unwrap(), Type::Float64);
571
572 assert_eq!(
574 arrow_to_ch_type(&DataType::Decimal128(9, 2), false, None).unwrap(),
575 Type::Decimal32(2)
576 );
577 assert_eq!(
578 arrow_to_ch_type(&DataType::Decimal128(18, 4), false, None).unwrap(),
579 Type::Decimal64(4)
580 );
581 assert_eq!(
582 arrow_to_ch_type(&DataType::Decimal256(76, 6), false, None).unwrap(),
583 Type::Decimal256(6)
584 );
585
586 assert_eq!(arrow_to_ch_type(&DataType::Date32, false, None).unwrap(), Type::Date);
588 let datetimes = [
589 arrow_to_ch_type(&DataType::Time32(TimeUnit::Second), false, None).unwrap(),
590 arrow_to_ch_type(&DataType::Time64(TimeUnit::Second), false, None).unwrap(),
591 arrow_to_ch_type(&DataType::Duration(TimeUnit::Second), false, None).unwrap(),
592 ];
593 for dt in datetimes {
594 assert_eq!(dt, Type::DateTime(Tz::UTC));
595 }
596
597 let datetimes = [
598 arrow_to_ch_type(&DataType::Date64, false, None).unwrap(),
599 arrow_to_ch_type(&DataType::Duration(TimeUnit::Millisecond), false, None).unwrap(),
600 arrow_to_ch_type(&DataType::Time32(TimeUnit::Millisecond), false, None).unwrap(),
601 arrow_to_ch_type(&DataType::Time64(TimeUnit::Millisecond), false, None).unwrap(),
602 ];
603 for dt in datetimes {
604 assert_eq!(dt, Type::DateTime64(3, Tz::UTC));
605 }
606 let datetimes = [
607 arrow_to_ch_type(&DataType::Duration(TimeUnit::Microsecond), false, None).unwrap(),
608 arrow_to_ch_type(&DataType::Time64(TimeUnit::Microsecond), false, None).unwrap(),
609 ];
610 for dt in datetimes {
611 assert_eq!(dt, Type::DateTime64(6, Tz::UTC));
612 }
613 let datetimes = [
614 arrow_to_ch_type(&DataType::Duration(TimeUnit::Nanosecond), false, None).unwrap(),
615 arrow_to_ch_type(&DataType::Time32(TimeUnit::Nanosecond), false, None).unwrap(),
616 arrow_to_ch_type(&DataType::Time64(TimeUnit::Nanosecond), false, None).unwrap(),
617 ];
618 for dt in datetimes {
619 assert_eq!(dt, Type::DateTime64(9, Tz::UTC));
620 }
621 assert_eq!(
622 arrow_to_ch_type(
623 &DataType::Timestamp(TimeUnit::Second, Some(Arc::from("America/New_York"))),
624 false,
625 None
626 )
627 .unwrap(),
628 Type::DateTime(Tz::America__New_York)
629 );
630 assert_eq!(
631 arrow_to_ch_type(
632 &DataType::Timestamp(TimeUnit::Millisecond, Some(Arc::from("America/New_York"))),
633 false,
634 None
635 )
636 .unwrap(),
637 Type::DateTime64(3, Tz::America__New_York)
638 );
639 assert_eq!(
640 arrow_to_ch_type(
641 &DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("America/New_York"))),
642 false,
643 None
644 )
645 .unwrap(),
646 Type::DateTime64(6, Tz::America__New_York)
647 );
648 assert_eq!(
649 arrow_to_ch_type(
650 &DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("America/New_York"))),
651 false,
652 None
653 )
654 .unwrap(),
655 Type::DateTime64(9, Tz::America__New_York)
656 );
657
658 let strings_types = [DataType::Utf8, DataType::Utf8View, DataType::LargeUtf8];
660 for s in strings_types {
661 assert_eq!(arrow_to_ch_type(&s, false, None).unwrap(), Type::String);
662 }
663
664 let binary_types = [DataType::Binary, DataType::BinaryView, DataType::LargeBinary];
665 for s in binary_types {
666 assert_eq!(arrow_to_ch_type(&s, false, None).unwrap(), Type::Binary);
667 }
668 assert_eq!(
669 arrow_to_ch_type(&DataType::FixedSizeBinary(4), false, None).unwrap(),
670 Type::FixedSizedBinary(4)
671 );
672
673 let list_field = Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false));
675 let list_types = [
676 DataType::List(Arc::clone(&list_field)),
677 DataType::ListView(Arc::clone(&list_field)),
678 DataType::LargeList(list_field),
679 ];
680 for l in list_types {
681 assert_eq!(
682 arrow_to_ch_type(&l, false, None).unwrap(),
683 Type::Array(Box::new(Type::Int32))
684 );
685 }
686
687 let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
689 assert_eq!(
690 arrow_to_ch_type(&dict_type, false, None).unwrap(),
691 Type::LowCardinality(Box::new(Type::String))
692 );
693
694 assert_eq!(
696 arrow_to_ch_type(&DataType::Int32, true, None).unwrap(),
697 Type::Nullable(Box::new(Type::Int32))
698 );
699
700 assert!(arrow_to_ch_type(&DataType::Null, false, None).is_err());
702 assert!(arrow_to_ch_type(&DataType::Float16, false, None).is_err());
703 assert!(
704 arrow_to_ch_type(
705 &DataType::RunEndEncoded(
706 Field::new("", DataType::Int32, false).into(),
707 Field::new("", DataType::Utf8, false).into()
708 ),
709 false,
710 None
711 )
712 .is_err()
713 );
714 }
715
716 #[test]
717 fn test_ch_to_arrow_type() {
718 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
719
720 assert_eq!(ch_to_arrow_type(&Type::Int8, options).unwrap(), (DataType::Int8, false));
722 assert_eq!(ch_to_arrow_type(&Type::UInt8, options).unwrap(), (DataType::UInt8, false));
723 assert_eq!(ch_to_arrow_type(&Type::Float64, options).unwrap(), (DataType::Float64, false));
724
725 assert_eq!(
727 ch_to_arrow_type(&Type::Decimal32(2), options).unwrap(),
728 (DataType::Decimal128(9, 2), false)
729 );
730 assert_eq!(
731 ch_to_arrow_type(&Type::Decimal256(6), options).unwrap(),
732 (DataType::Decimal256(76, 6), false)
733 );
734
735 assert_eq!(
737 ch_to_arrow_type(&Type::DateTime(Tz::UTC), options).unwrap(),
738 (DataType::Timestamp(TimeUnit::Second, Some(Arc::from("UTC"))), false)
739 );
740 assert_eq!(
741 ch_to_arrow_type(&Type::DateTime64(6, Tz::America__New_York), options).unwrap(),
742 (
743 DataType::Timestamp(TimeUnit::Microsecond, Some(Arc::from("America/New_York"))),
744 false
745 )
746 );
747
748 assert_eq!(ch_to_arrow_type(&Type::String, options).unwrap(), (DataType::Utf8, false));
750 assert_eq!(
751 ch_to_arrow_type(&Type::FixedSizedString(4), options).unwrap(),
752 (DataType::FixedSizeBinary(4), false)
753 );
754 assert_eq!(
755 ch_to_arrow_type(&Type::FixedSizedBinary(4), options).unwrap(),
756 (DataType::FixedSizeBinary(4), false)
757 );
758
759 assert_eq!(ch_to_arrow_type(&Type::String, None).unwrap(), (DataType::Binary, false));
761 assert_eq!(
763 ch_to_arrow_type(&Type::FixedSizedString(4), None).unwrap(),
764 (DataType::FixedSizeBinary(4), false)
765 );
766
767 assert_eq!(
769 ch_to_arrow_type(&Type::Array(Box::new(Type::Int32)), options).unwrap(),
770 (
771 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false))),
772 false
773 )
774 );
775
776 assert_eq!(
778 ch_to_arrow_type(&Type::LowCardinality(Box::new(Type::String)), None).unwrap(),
779 (DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)), false)
780 );
781
782 let tuple_type = Type::Tuple(vec![Type::Int32, Type::String]);
784 let expected_struct = DataType::Struct(
785 vec![
786 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}0"), DataType::Int32, false),
787 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}1"), DataType::Utf8, false),
788 ]
789 .into(),
790 );
791 assert_eq!(ch_to_arrow_type(&tuple_type, options).unwrap(), (expected_struct, false));
792
793 let map_type = Type::Map(Box::new(Type::String), Box::new(Type::Int32));
795 let expected_map = DataType::Map(
796 Arc::new(Field::new(
797 MAP_FIELD_NAME,
798 DataType::Struct(
799 vec![
800 Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
801 Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, false),
802 ]
803 .into(),
804 ),
805 false,
806 )),
807 false,
808 );
809 assert_eq!(ch_to_arrow_type(&map_type, options).unwrap(), (expected_map, false));
810
811 assert_eq!(
813 ch_to_arrow_type(&Type::Nullable(Box::new(Type::Int32)), options).unwrap(),
814 (DataType::Int32, true)
815 );
816
817 assert!(ch_to_arrow_type(&Type::DateTime64(10, Tz::UTC), options).is_err());
819 }
820
821 #[test]
823 fn test_arrow_to_ch_type_nullable_map() {
824 let options = Some(ArrowOptions::default());
825 let struct_field = Arc::new(Field::new(
826 MAP_FIELD_NAME,
827 DataType::Struct(Fields::from(vec![
828 Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
829 Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, true),
830 ])),
831 false,
832 ));
833 let map_type = DataType::Map(Arc::clone(&struct_field), false);
834
835 let ch_type = arrow_to_ch_type(&map_type, false, options).unwrap();
836 assert_eq!(
837 ch_type,
838 Type::Map(Box::new(Type::String), Box::new(Type::Nullable(Box::new(Type::Int32))))
839 );
840 }
841
842 #[test]
845 fn test_ch_to_arrow_type_nullable_map() {
846 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
847 let ch_type = Type::Map(Box::new(Type::String), Box::new(Type::Int32));
848 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, options).unwrap();
849
850 let expected_struct_field = Arc::new(Field::new(
851 MAP_FIELD_NAME,
852 DataType::Struct(Fields::from(vec![
853 Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
854 Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, false),
855 ])),
856 false,
857 ));
858 let expected_arrow_type = DataType::Map(Arc::clone(&expected_struct_field), false);
859
860 assert_eq!(arrow_type, expected_arrow_type);
861 assert!(!is_nullable);
862
863 let ch_type_nullable = Type::Nullable(Box::new(ch_type));
865 let (arrow_type_nullable, is_nullable_nullable) =
866 ch_to_arrow_type(&ch_type_nullable, options).unwrap();
867 assert_eq!(arrow_type_nullable, expected_arrow_type);
868 assert!(is_nullable_nullable);
869 }
870
871 #[test]
873 fn test_roundtrip_struct() {
874 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
876 let ch_type = Type::Tuple(vec![Type::Nullable(Box::new(Type::Int32)), Type::String]);
877 let struct_type = DataType::Struct(Fields::from(vec![
878 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}0"), DataType::Int32, true),
879 Field::new(format!("{TUPLE_FIELD_NAME_PREFIX}1"), DataType::Utf8, false),
880 ]));
881
882 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, options).unwrap();
883 assert_eq!(arrow_type, struct_type.clone());
884 assert!(!is_nullable);
885
886 let ch_type_back = arrow_to_ch_type(&struct_type, false, options).unwrap();
887 assert_eq!(ch_type_back, ch_type);
888 }
889
890 #[test]
893 fn test_roundtrip_tuple() {
894 let options = Some(ArrowOptions::default().with_strings_as_strings(true));
895 let ch_type = Type::Tuple(vec![Type::Int32, Type::String]);
896
897 let expected_arrow_type = DataType::Struct(Fields::from(vec![
898 Field::new("field_0", DataType::Int32, false),
899 Field::new("field_1", DataType::Utf8, false),
900 ]));
901 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, options).unwrap();
902
903 assert_eq!(arrow_type, expected_arrow_type);
904 assert!(!is_nullable);
905
906 let ch_type_back = arrow_to_ch_type(&expected_arrow_type, false, options).unwrap();
907 assert_eq!(ch_type_back, ch_type);
908 }
909
910 #[test]
913 fn test_roundtrip_dictionary() {
914 let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
915 let field = Arc::new(Field::new("col", dict_type.clone(), false));
916 let nullable_dict_type =
917 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
918
919 let ch_type = arrow_to_ch_type(&nullable_dict_type, field.is_nullable(), None).unwrap();
920 assert_eq!(ch_type, Type::LowCardinality(Box::new(Type::String)));
921
922 let ch_type_nullable = arrow_to_ch_type(&nullable_dict_type, true, None).unwrap();
924 assert_eq!(
925 ch_type_nullable,
926 Type::LowCardinality(Box::new(Type::Nullable(Box::new(Type::String))))
927 );
928
929 let ch_type_back = arrow_to_ch_type(&nullable_dict_type, false, None).unwrap();
930 assert_eq!(ch_type_back, ch_type);
931
932 let options_err = Some(ArrowOptions::default().with_strict_schema(true));
933 assert!(arrow_to_ch_type(&nullable_dict_type, true, options_err).is_err());
934 }
935
936 #[test]
939 fn test_roundtrip_nested_nullable_array() {
940 let ch_type =
941 Type::Array(Box::new(Type::Nullable(Box::new(Type::Array(Box::new(Type::Int32))))));
942 let expected_nullable_list_field = Arc::new(Field::new(
943 LIST_ITEM_FIELD_NAME,
944 DataType::List(Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, false))),
945 true,
946 ));
947 let expected_arrow_type = DataType::List(Arc::clone(&expected_nullable_list_field));
948
949 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, None).unwrap();
950 assert_eq!(arrow_type, expected_arrow_type);
951 assert!(!is_nullable);
952
953 let ch_type_nullable = Type::Nullable(Box::new(ch_type.clone()));
955 let (arrow_type_nullable, is_nullable_nullable) =
956 ch_to_arrow_type(&ch_type_nullable, None).unwrap();
957 assert_eq!(arrow_type_nullable, expected_arrow_type);
958 assert!(is_nullable_nullable);
959
960 assert!(
962 arrow_to_ch_type(
963 &expected_arrow_type,
964 true,
965 Some(
966 ArrowOptions::default()
967 .with_strict_schema(true)
968 .with_nullable_array_default_empty(false)
969 )
970 )
971 .is_err()
972 );
973
974 let ch_type_back = arrow_to_ch_type(&expected_arrow_type, false, None).unwrap();
977 let expected_back = Type::Array(Box::new(Type::Array(Box::new(Type::Int32))));
978 assert_eq!(ch_type_back, expected_back);
979 }
980
981 #[test]
983 fn test_roundtrip_low_cardinality_int32() {
984 let options_err = Some(ArrowOptions::default().with_strict_schema(true));
985 let ch_type = Type::LowCardinality(Box::new(Type::Int32));
986 let expected_arrow_type =
987 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32));
988
989 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, None).unwrap();
990 assert_eq!(arrow_type, expected_arrow_type);
991 assert!(!is_nullable);
992
993 let ch_type_nullable = Type::Nullable(Box::new(ch_type.clone()));
994 assert!(ch_to_arrow_type(&ch_type_nullable, options_err).is_err());
995
996 let ch_type_back = arrow_to_ch_type(&expected_arrow_type, is_nullable, None).unwrap();
997 assert_eq!(ch_type_back, ch_type);
998
999 assert!(arrow_to_ch_type(&expected_arrow_type, true, options_err).is_err());
1000 }
1001
1002 #[test]
1005 fn test_round_trip_low_cardinality_nullable() {
1006 let ch_type = Type::Nullable(Box::new(Type::LowCardinality(Box::new(Type::Nullable(
1007 Box::new(Type::String),
1008 )))));
1009 let expected_arrow_type =
1011 DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary));
1012
1013 let (arrow_type, is_nullable) = ch_to_arrow_type(&ch_type, None).unwrap();
1014 assert_eq!(arrow_type, expected_arrow_type);
1015
1016 assert!(is_nullable);
1018
1019 let ch_type_back = arrow_to_ch_type(&arrow_type, is_nullable, None).unwrap();
1020 assert_eq!(
1021 ch_type_back,
1022 Type::LowCardinality(Box::new(Type::Nullable(Box::new(Type::Binary))))
1023 );
1024 }
1025
1026 #[test]
1027 #[expect(clippy::too_many_lines)]
1028 fn test_schema_conversion() {
1029 let arrow_options = Some(
1030 ArrowOptions::default()
1031 .with_strings_as_strings(true)
1033 .with_use_date32_for_date(true)
1035 .with_strict_schema(false),
1037 );
1038
1039 let fields = vec![
1041 Field::new("string_field", DataType::Utf8, false),
1042 Field::new("binary_field", DataType::Binary, false),
1043 Field::new("nullable_string_field", DataType::Utf8, true),
1044 Field::new(
1045 "nullable_dict_field",
1046 DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
1047 true,
1048 ),
1049 Field::new(
1050 "nullable_dict_16_field",
1051 DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8)),
1052 true,
1053 ),
1054 Field::new("date_field", DataType::Date32, false),
1055 Field::new("int_field", DataType::Int32, false),
1056 ];
1057
1058 let mut conversions = HashMap::new();
1060 drop(conversions.insert(
1061 "string_field".to_string(),
1062 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]),
1063 ));
1064 drop(conversions.insert(
1065 "binary_field".to_string(),
1066 Type::Enum16(vec![("x".to_string(), 1), ("y".to_string(), 2)]),
1067 ));
1068 drop(conversions.insert(
1069 "nullable_string_field".to_string(),
1070 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]).into_nullable(),
1071 ));
1072 drop(conversions.insert(
1073 "nullable_dict_field".to_string(),
1074 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]).into_nullable(),
1075 ));
1076 drop(conversions.insert(
1077 "nullable_dict_16_field".to_string(),
1078 Type::Enum16(vec![("x".to_string(), 1), ("y".to_string(), 2)]).into_nullable(),
1079 ));
1080 drop(conversions.insert("date_field".to_string(), Type::Date));
1081 drop(conversions.insert(
1082 "int_field".to_string(),
1083 Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]),
1084 ));
1085
1086 let string_field = &fields[0];
1088 let result = schema_conversion(string_field, Some(&conversions), arrow_options);
1089 assert!(result.is_ok());
1090 assert_eq!(result.unwrap(), Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)]));
1091
1092 let binary_field = &fields[1];
1094 let result = schema_conversion(binary_field, Some(&conversions), arrow_options);
1095 assert!(result.is_ok());
1096 assert_eq!(result.unwrap(), Type::Enum16(vec![("x".to_string(), 1), ("y".to_string(), 2)]));
1097
1098 let nullable_string_field = &fields[2];
1100 let result = schema_conversion(nullable_string_field, Some(&conversions), arrow_options);
1101 assert!(result.is_ok());
1102 assert_eq!(
1103 result.unwrap(),
1104 Type::Nullable(Box::new(Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)])))
1105 );
1106
1107 let nullable_string_dict_field = &fields[3];
1109 let result =
1110 schema_conversion(nullable_string_dict_field, Some(&conversions), arrow_options);
1111 assert!(result.is_ok());
1112 assert_eq!(
1113 result.unwrap(),
1114 Type::Nullable(Box::new(Type::Enum8(vec![("a".to_string(), 1), ("b".to_string(), 2)])))
1115 );
1116
1117 let nullable_string_dict_16_field = &fields[4];
1119 let result =
1120 schema_conversion(nullable_string_dict_16_field, Some(&conversions), arrow_options);
1121 assert!(result.is_ok());
1122 assert_eq!(
1123 result.unwrap(),
1124 Type::Nullable(Box::new(Type::Enum16(vec![
1125 ("x".to_string(), 1),
1126 ("y".to_string(), 2)
1127 ])))
1128 );
1129
1130 let date_field = &fields[5];
1132 let result = schema_conversion(date_field, Some(&conversions), arrow_options);
1133 assert!(result.is_ok());
1134 assert_eq!(result.unwrap(), Type::Date);
1135
1136 let int_field = &fields[6];
1138 let result = schema_conversion(int_field, Some(&conversions), arrow_options);
1139 assert!(result.is_err());
1140 assert_eq!(
1141 result.unwrap_err().to_string(),
1142 "type conversion failure: expected LowCardinality(String) or String/Binary, found \
1143 Int32"
1144 );
1145
1146 let result = schema_conversion(string_field, None, arrow_options);
1148 assert!(result.is_ok());
1149 assert_eq!(result.unwrap(), Type::String);
1150
1151 let mut bad_conversions = HashMap::new();
1153 drop(bad_conversions.insert("string_field".to_string(), Type::Date));
1154 let result = schema_conversion(string_field, Some(&bad_conversions), arrow_options);
1155 assert!(result.is_err());
1156 assert_eq!(
1157 result.unwrap_err().to_string(),
1158 "type conversion failure: expected Date or Date32, found String"
1159 );
1160
1161 let conversion_opts_date32 = arrow_options.map(|o| o.with_use_date32_for_date(true));
1163 let result = schema_conversion(date_field, None, conversion_opts_date32);
1164 assert!(result.is_ok());
1165 assert_eq!(result.unwrap(), Type::Date32);
1166 }
1167}