datafusion_common/datatype.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with Arrow [`DataType`] and [`Field`]s
19
20use crate::arrow::datatypes::{DataType, Field, FieldRef};
21use crate::metadata::FieldMetadata;
22use std::sync::Arc;
23
24/// DataFusion extension methods for Arrow [`DataType`]
25pub trait DataTypeExt {
26 /// Convert the type to field with nullable type and "" name
27 ///
28 /// This is used to track the places where we convert a [`DataType`]
29 /// into a nameless field to interact with an API that is
30 /// capable of representing an extension type and/or nullability.
31 ///
32 /// For example, it will convert a `DataType::Int32` into
33 /// `Field::new("", DataType::Int32, true)`.
34 ///
35 /// ```
36 /// # use datafusion_common::datatype::DataTypeExt;
37 /// # use arrow::datatypes::DataType;
38 /// let dt = DataType::Utf8;
39 /// let field = dt.into_nullable_field();
40 /// // result is a nullable Utf8 field with "" name
41 /// assert_eq!(field.name(), "");
42 /// assert_eq!(field.data_type(), &DataType::Utf8);
43 /// assert!(field.is_nullable());
44 /// ```
45 fn into_nullable_field(self) -> Field;
46
47 /// Convert the type to [`FieldRef`] with nullable type and "" name
48 ///
49 /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
50 /// constructs a [`FieldRef`].
51 fn into_nullable_field_ref(self) -> FieldRef;
52}
53
54impl DataTypeExt for DataType {
55 fn into_nullable_field(self) -> Field {
56 Field::new("", self, true)
57 }
58
59 fn into_nullable_field_ref(self) -> FieldRef {
60 Arc::new(Field::new("", self, true))
61 }
62}
63
64/// DataFusion extension methods for Arrow [`Field`] and [`FieldRef`]
65///
66/// This trait is implemented for both [`Field`] and [`FieldRef`] and
67/// provides convenience methods for efficiently working with both types.
68///
69/// For [`FieldRef`], the methods will attempt to unwrap the `Arc`
70/// to avoid unnecessary cloning when possible.
71pub trait FieldExt {
72 /// Ensure the field is named `new_name`, returning the given field if the
73 /// name matches, and a new field if not.
74 ///
75 /// This method avoids `clone`ing fields and names if the name is the same
76 /// as the field's existing name.
77 ///
78 /// Example:
79 /// ```
80 /// # use std::sync::Arc;
81 /// # use arrow::datatypes::{DataType, Field};
82 /// # use datafusion_common::datatype::FieldExt;
83 /// let int_field = Field::new("my_int", DataType::Int32, true);
84 /// // rename to "your_int"
85 /// let renamed_field = int_field.renamed("your_int");
86 /// assert_eq!(renamed_field.name(), "your_int");
87 /// ```
88 fn renamed(self, new_name: &str) -> Self;
89
90 /// Ensure the field has the given data type
91 ///
92 /// Note this is different than simply calling [`Field::with_data_type`] as
93 /// it avoids copying if the data type is already the same.
94 ///
95 /// Example:
96 /// ```
97 /// # use std::sync::Arc;
98 /// # use arrow::datatypes::{DataType, Field};
99 /// # use datafusion_common::datatype::FieldExt;
100 /// let int_field = Field::new("my_int", DataType::Int32, true);
101 /// // change to Float64
102 /// let retyped_field = int_field.retyped(DataType::Float64);
103 /// assert_eq!(retyped_field.data_type(), &DataType::Float64);
104 /// ```
105 fn retyped(self, new_data_type: DataType) -> Self;
106
107 /// Add field metadata to the Field
108 fn with_field_metadata(self, metadata: &FieldMetadata) -> Self;
109
110 /// Add optional field metadata,
111 fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self;
112
113 /// Returns a new Field representing a List of this Field's DataType.
114 ///
115 /// For example if input represents an `Int32`, the return value will
116 /// represent a `List<Int32>`.
117 ///
118 /// Example:
119 /// ```
120 /// # use std::sync::Arc;
121 /// # use arrow::datatypes::{DataType, Field};
122 /// # use datafusion_common::datatype::FieldExt;
123 /// // Int32 field
124 /// let int_field = Field::new("my_int", DataType::Int32, true);
125 /// // convert to a List field
126 /// let list_field = int_field.into_list();
127 /// // List<Int32>
128 /// // Note that the item field name has been renamed to "item"
129 /// assert_eq!(list_field.data_type(), &DataType::List(Arc::new(
130 /// Field::new("item", DataType::Int32, true)
131 /// )));
132 fn into_list(self) -> Self;
133
134 /// Return a new Field representing this Field as the item type of a
135 /// [`DataType::FixedSizeList`]
136 ///
137 /// For example if input represents an `Int32`, the return value will
138 /// represent a `FixedSizeList<Int32, size>`.
139 ///
140 /// Example:
141 /// ```
142 /// # use std::sync::Arc;
143 /// # use arrow::datatypes::{DataType, Field};
144 /// # use datafusion_common::datatype::FieldExt;
145 /// // Int32 field
146 /// let int_field = Field::new("my_int", DataType::Int32, true);
147 /// // convert to a FixedSizeList field of size 3
148 /// let fixed_size_list_field = int_field.into_fixed_size_list(3);
149 /// // FixedSizeList<Int32, 3>
150 /// // Note that the item field name has been renamed to "item"
151 /// assert_eq!(
152 /// fixed_size_list_field.data_type(),
153 /// &DataType::FixedSizeList(Arc::new(
154 /// Field::new("item", DataType::Int32, true)),
155 /// 3
156 /// ));
157 fn into_fixed_size_list(self, list_size: i32) -> Self;
158
159 /// Update the field to have the default list field name ("item")
160 ///
161 /// Lists are allowed to have an arbitrarily named field; however, a name
162 /// other than 'item' will cause it to fail an == check against a more
163 /// idiomatically created list in arrow-rs which causes issues.
164 ///
165 /// For example, if input represents an `Int32` field named "my_int",
166 /// the return value will represent an `Int32` field named "item".
167 ///
168 /// Example:
169 /// ```
170 /// # use arrow::datatypes::Field;
171 /// # use datafusion_common::datatype::FieldExt;
172 /// let my_field = Field::new("my_int", arrow::datatypes::DataType::Int32, true);
173 /// let item_field = my_field.into_list_item();
174 /// assert_eq!(item_field.name(), Field::LIST_FIELD_DEFAULT_NAME);
175 /// assert_eq!(item_field.name(), "item");
176 /// ```
177 fn into_list_item(self) -> Self;
178}
179
180impl FieldExt for Field {
181 fn renamed(self, new_name: &str) -> Self {
182 // check if this is a new name before allocating a new Field / copying
183 // the existing one
184 if self.name() != new_name {
185 self.with_name(new_name)
186 } else {
187 self
188 }
189 }
190
191 fn retyped(self, new_data_type: DataType) -> Self {
192 self.with_data_type(new_data_type)
193 }
194
195 fn with_field_metadata(self, metadata: &FieldMetadata) -> Self {
196 metadata.add_to_field(self)
197 }
198
199 fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self {
200 if let Some(metadata) = metadata {
201 self.with_field_metadata(metadata)
202 } else {
203 self
204 }
205 }
206
207 fn into_list(self) -> Self {
208 DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
209 }
210
211 fn into_fixed_size_list(self, list_size: i32) -> Self {
212 DataType::FixedSizeList(self.into_list_item().into(), list_size)
213 .into_nullable_field()
214 }
215
216 fn into_list_item(self) -> Self {
217 if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
218 self.with_name(Field::LIST_FIELD_DEFAULT_NAME)
219 } else {
220 self
221 }
222 }
223}
224
225impl FieldExt for Arc<Field> {
226 fn renamed(mut self, new_name: &str) -> Self {
227 if self.name() != new_name {
228 // avoid cloning if possible
229 Arc::make_mut(&mut self).set_name(new_name);
230 }
231 self
232 }
233
234 fn retyped(mut self, new_data_type: DataType) -> Self {
235 if self.data_type() != &new_data_type {
236 // avoid cloning if possible
237 Arc::make_mut(&mut self).set_data_type(new_data_type);
238 }
239 self
240 }
241
242 fn with_field_metadata(self, metadata: &FieldMetadata) -> Self {
243 metadata.add_to_field_ref(self)
244 }
245
246 fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self {
247 if let Some(metadata) = metadata {
248 self.with_field_metadata(metadata)
249 } else {
250 self
251 }
252 }
253
254 fn into_list(self) -> Self {
255 DataType::List(self.into_list_item())
256 .into_nullable_field()
257 .into()
258 }
259
260 fn into_fixed_size_list(self, list_size: i32) -> Self {
261 DataType::FixedSizeList(self.into_list_item(), list_size)
262 .into_nullable_field()
263 .into()
264 }
265
266 fn into_list_item(mut self) -> Self {
267 if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
268 // avoid cloning if possible
269 Arc::make_mut(&mut self).set_name(Field::LIST_FIELD_DEFAULT_NAME);
270 }
271 self
272 }
273}