datafusion_common/
datatype.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with DataTypes to Fields
19
20use crate::arrow::datatypes::{DataType, Field, FieldRef};
21use std::sync::Arc;
22
23/// DataFusion extension methods for Arrow [`DataType`]
24pub trait DataTypeExt {
25    /// Convert the type to field with nullable type and "" name
26    ///
27    /// This is used to track the places where we convert a [`DataType`]
28    /// into a nameless field to interact with an API that is
29    /// capable of representing an extension type and/or nullability.
30    ///
31    /// For example, it will convert a `DataType::Int32` into
32    /// `Field::new("", DataType::Int32, true)`.
33    ///
34    /// ```
35    /// # use datafusion_common::datatype::DataTypeExt;
36    /// # use arrow::datatypes::DataType;
37    /// let dt = DataType::Utf8;
38    /// let field = dt.into_nullable_field();
39    /// // result is a nullable Utf8 field with "" name
40    /// assert_eq!(field.name(), "");
41    /// assert_eq!(field.data_type(), &DataType::Utf8);
42    /// assert!(field.is_nullable());
43    /// ```
44    fn into_nullable_field(self) -> Field;
45
46    /// Convert the type to [`FieldRef`] with nullable type and "" name
47    ///
48    /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
49    /// constructs a [`FieldRef`].
50    fn into_nullable_field_ref(self) -> FieldRef;
51}
52
53impl DataTypeExt for DataType {
54    fn into_nullable_field(self) -> Field {
55        Field::new("", self, true)
56    }
57
58    fn into_nullable_field_ref(self) -> FieldRef {
59        Arc::new(Field::new("", self, true))
60    }
61}
62
63/// DataFusion extension methods for Arrow [`Field`] and [`FieldRef`]
64pub trait FieldExt {
65    /// Returns a new Field representing a List of this Field's DataType.
66    ///
67    /// For example if input represents an `Int32`, the return value will
68    /// represent a `List<Int32>`.
69    ///
70    /// Example:
71    /// ```
72    /// # use std::sync::Arc;
73    /// # use arrow::datatypes::{DataType, Field};
74    /// # use datafusion_common::datatype::FieldExt;
75    /// // Int32 field
76    /// let int_field = Field::new("my_int", DataType::Int32, true);
77    /// // convert to a List field
78    /// let list_field = int_field.into_list();
79    /// // List<Int32>
80    /// // Note that the item field name has been renamed to "item"
81    /// assert_eq!(list_field.data_type(), &DataType::List(Arc::new(
82    ///     Field::new("item", DataType::Int32, true)
83    /// )));
84    fn into_list(self) -> Self;
85
86    /// Return a new Field representing this Field as the item type of a
87    /// [`DataType::FixedSizeList`]
88    ///
89    /// For example if input represents an `Int32`, the return value will
90    /// represent a `FixedSizeList<Int32, size>`.
91    ///
92    /// Example:
93    /// ```
94    /// # use std::sync::Arc;
95    /// # use arrow::datatypes::{DataType, Field};
96    /// # use datafusion_common::datatype::FieldExt;
97    /// // Int32 field
98    /// let int_field = Field::new("my_int", DataType::Int32, true);
99    /// // convert to a FixedSizeList field of size 3
100    /// let fixed_size_list_field = int_field.into_fixed_size_list(3);
101    /// // FixedSizeList<Int32, 3>
102    /// // Note that the item field name has been renamed to "item"
103    /// assert_eq!(
104    ///   fixed_size_list_field.data_type(),
105    ///   &DataType::FixedSizeList(Arc::new(
106    ///    Field::new("item", DataType::Int32, true)),
107    ///    3
108    /// ));
109    fn into_fixed_size_list(self, list_size: i32) -> Self;
110
111    /// Update the field to have the default list field name ("item")
112    ///
113    /// Lists are allowed to have an arbitrarily named field; however, a name
114    /// other than 'item' will cause it to fail an == check against a more
115    /// idiomatically created list in arrow-rs which causes issues.
116    ///
117    /// For example, if input represents an `Int32` field named "my_int",
118    /// the return value will represent an `Int32` field named "item".
119    ///
120    /// Example:
121    /// ```
122    /// # use arrow::datatypes::Field;
123    /// # use datafusion_common::datatype::FieldExt;
124    /// let my_field = Field::new("my_int", arrow::datatypes::DataType::Int32, true);
125    /// let item_field = my_field.into_list_item();
126    /// assert_eq!(item_field.name(), Field::LIST_FIELD_DEFAULT_NAME);
127    /// assert_eq!(item_field.name(), "item");
128    /// ```
129    fn into_list_item(self) -> Self;
130}
131
132impl FieldExt for Field {
133    fn into_list(self) -> Self {
134        DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
135    }
136
137    fn into_fixed_size_list(self, list_size: i32) -> Self {
138        DataType::FixedSizeList(self.into_list_item().into(), list_size)
139            .into_nullable_field()
140    }
141
142    fn into_list_item(self) -> Self {
143        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
144            self.with_name(Field::LIST_FIELD_DEFAULT_NAME)
145        } else {
146            self
147        }
148    }
149}
150
151impl FieldExt for Arc<Field> {
152    fn into_list(self) -> Self {
153        DataType::List(self.into_list_item())
154            .into_nullable_field()
155            .into()
156    }
157
158    fn into_fixed_size_list(self, list_size: i32) -> Self {
159        DataType::FixedSizeList(self.into_list_item(), list_size)
160            .into_nullable_field()
161            .into()
162    }
163
164    fn into_list_item(self) -> Self {
165        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
166            Arc::unwrap_or_clone(self)
167                .with_name(Field::LIST_FIELD_DEFAULT_NAME)
168                .into()
169        } else {
170            self
171        }
172    }
173}