datafusion_common/
datatype.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with Arrow [`DataType`] and [`Field`]s
19
20use crate::arrow::datatypes::{DataType, Field, FieldRef};
21use crate::metadata::FieldMetadata;
22use std::sync::Arc;
23
24/// DataFusion extension methods for Arrow [`DataType`]
25pub trait DataTypeExt {
26    /// Convert the type to field with nullable type and "" name
27    ///
28    /// This is used to track the places where we convert a [`DataType`]
29    /// into a nameless field to interact with an API that is
30    /// capable of representing an extension type and/or nullability.
31    ///
32    /// For example, it will convert a `DataType::Int32` into
33    /// `Field::new("", DataType::Int32, true)`.
34    ///
35    /// ```
36    /// # use datafusion_common::datatype::DataTypeExt;
37    /// # use arrow::datatypes::DataType;
38    /// let dt = DataType::Utf8;
39    /// let field = dt.into_nullable_field();
40    /// // result is a nullable Utf8 field with "" name
41    /// assert_eq!(field.name(), "");
42    /// assert_eq!(field.data_type(), &DataType::Utf8);
43    /// assert!(field.is_nullable());
44    /// ```
45    fn into_nullable_field(self) -> Field;
46
47    /// Convert the type to [`FieldRef`] with nullable type and "" name
48    ///
49    /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
50    /// constructs a [`FieldRef`].
51    fn into_nullable_field_ref(self) -> FieldRef;
52}
53
54impl DataTypeExt for DataType {
55    fn into_nullable_field(self) -> Field {
56        Field::new("", self, true)
57    }
58
59    fn into_nullable_field_ref(self) -> FieldRef {
60        Arc::new(Field::new("", self, true))
61    }
62}
63
64/// DataFusion extension methods for Arrow [`Field`] and [`FieldRef`]
65///
66/// This trait is implemented for both [`Field`] and [`FieldRef`] and
67/// provides convenience methods for efficiently working with both types.
68///
69/// For [`FieldRef`], the methods will attempt to unwrap the `Arc`
70/// to avoid unnecessary cloning when possible.
71pub trait FieldExt {
72    /// Ensure the field is named `new_name`, returning the given field if the
73    /// name matches, and a new field if not.
74    ///
75    /// This method avoids `clone`ing fields and names if the name is the same
76    /// as the field's existing name.
77    ///
78    /// Example:
79    /// ```
80    /// # use std::sync::Arc;
81    /// # use arrow::datatypes::{DataType, Field};
82    /// # use datafusion_common::datatype::FieldExt;
83    /// let int_field = Field::new("my_int", DataType::Int32, true);
84    /// // rename to "your_int"
85    /// let renamed_field = int_field.renamed("your_int");
86    /// assert_eq!(renamed_field.name(), "your_int");
87    /// ```
88    fn renamed(self, new_name: &str) -> Self;
89
90    /// Ensure the field has the given data type
91    ///
92    /// Note this is different than simply calling [`Field::with_data_type`] as
93    /// it avoids copying if the data type is already the same.
94    ///
95    /// Example:
96    /// ```
97    /// # use std::sync::Arc;
98    /// # use arrow::datatypes::{DataType, Field};
99    /// # use datafusion_common::datatype::FieldExt;
100    /// let int_field = Field::new("my_int", DataType::Int32, true);
101    /// // change to Float64
102    /// let retyped_field = int_field.retyped(DataType::Float64);
103    /// assert_eq!(retyped_field.data_type(), &DataType::Float64);
104    /// ```
105    fn retyped(self, new_data_type: DataType) -> Self;
106
107    /// Add field metadata to the Field
108    fn with_field_metadata(self, metadata: &FieldMetadata) -> Self;
109
110    /// Add optional field metadata,
111    fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self;
112
113    /// Returns a new Field representing a List of this Field's DataType.
114    ///
115    /// For example if input represents an `Int32`, the return value will
116    /// represent a `List<Int32>`.
117    ///
118    /// Example:
119    /// ```
120    /// # use std::sync::Arc;
121    /// # use arrow::datatypes::{DataType, Field};
122    /// # use datafusion_common::datatype::FieldExt;
123    /// // Int32 field
124    /// let int_field = Field::new("my_int", DataType::Int32, true);
125    /// // convert to a List field
126    /// let list_field = int_field.into_list();
127    /// // List<Int32>
128    /// // Note that the item field name has been renamed to "item"
129    /// assert_eq!(list_field.data_type(), &DataType::List(Arc::new(
130    ///     Field::new("item", DataType::Int32, true)
131    /// )));
132    fn into_list(self) -> Self;
133
134    /// Return a new Field representing this Field as the item type of a
135    /// [`DataType::FixedSizeList`]
136    ///
137    /// For example if input represents an `Int32`, the return value will
138    /// represent a `FixedSizeList<Int32, size>`.
139    ///
140    /// Example:
141    /// ```
142    /// # use std::sync::Arc;
143    /// # use arrow::datatypes::{DataType, Field};
144    /// # use datafusion_common::datatype::FieldExt;
145    /// // Int32 field
146    /// let int_field = Field::new("my_int", DataType::Int32, true);
147    /// // convert to a FixedSizeList field of size 3
148    /// let fixed_size_list_field = int_field.into_fixed_size_list(3);
149    /// // FixedSizeList<Int32, 3>
150    /// // Note that the item field name has been renamed to "item"
151    /// assert_eq!(
152    ///   fixed_size_list_field.data_type(),
153    ///   &DataType::FixedSizeList(Arc::new(
154    ///    Field::new("item", DataType::Int32, true)),
155    ///    3
156    /// ));
157    fn into_fixed_size_list(self, list_size: i32) -> Self;
158
159    /// Update the field to have the default list field name ("item")
160    ///
161    /// Lists are allowed to have an arbitrarily named field; however, a name
162    /// other than 'item' will cause it to fail an == check against a more
163    /// idiomatically created list in arrow-rs which causes issues.
164    ///
165    /// For example, if input represents an `Int32` field named "my_int",
166    /// the return value will represent an `Int32` field named "item".
167    ///
168    /// Example:
169    /// ```
170    /// # use arrow::datatypes::Field;
171    /// # use datafusion_common::datatype::FieldExt;
172    /// let my_field = Field::new("my_int", arrow::datatypes::DataType::Int32, true);
173    /// let item_field = my_field.into_list_item();
174    /// assert_eq!(item_field.name(), Field::LIST_FIELD_DEFAULT_NAME);
175    /// assert_eq!(item_field.name(), "item");
176    /// ```
177    fn into_list_item(self) -> Self;
178}
179
180impl FieldExt for Field {
181    fn renamed(self, new_name: &str) -> Self {
182        // check if this is a new name before allocating a new Field / copying
183        // the existing one
184        if self.name() != new_name {
185            self.with_name(new_name)
186        } else {
187            self
188        }
189    }
190
191    fn retyped(self, new_data_type: DataType) -> Self {
192        self.with_data_type(new_data_type)
193    }
194
195    fn with_field_metadata(self, metadata: &FieldMetadata) -> Self {
196        metadata.add_to_field(self)
197    }
198
199    fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self {
200        if let Some(metadata) = metadata {
201            self.with_field_metadata(metadata)
202        } else {
203            self
204        }
205    }
206
207    fn into_list(self) -> Self {
208        DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
209    }
210
211    fn into_fixed_size_list(self, list_size: i32) -> Self {
212        DataType::FixedSizeList(self.into_list_item().into(), list_size)
213            .into_nullable_field()
214    }
215
216    fn into_list_item(self) -> Self {
217        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
218            self.with_name(Field::LIST_FIELD_DEFAULT_NAME)
219        } else {
220            self
221        }
222    }
223}
224
225impl FieldExt for Arc<Field> {
226    fn renamed(mut self, new_name: &str) -> Self {
227        if self.name() != new_name {
228            // avoid cloning if possible
229            Arc::make_mut(&mut self).set_name(new_name);
230        }
231        self
232    }
233
234    fn retyped(mut self, new_data_type: DataType) -> Self {
235        if self.data_type() != &new_data_type {
236            // avoid cloning if possible
237            Arc::make_mut(&mut self).set_data_type(new_data_type);
238        }
239        self
240    }
241
242    fn with_field_metadata(self, metadata: &FieldMetadata) -> Self {
243        metadata.add_to_field_ref(self)
244    }
245
246    fn with_field_metadata_opt(self, metadata: Option<&FieldMetadata>) -> Self {
247        if let Some(metadata) = metadata {
248            self.with_field_metadata(metadata)
249        } else {
250            self
251        }
252    }
253
254    fn into_list(self) -> Self {
255        DataType::List(self.into_list_item())
256            .into_nullable_field()
257            .into()
258    }
259
260    fn into_fixed_size_list(self, list_size: i32) -> Self {
261        DataType::FixedSizeList(self.into_list_item(), list_size)
262            .into_nullable_field()
263            .into()
264    }
265
266    fn into_list_item(mut self) -> Self {
267        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
268            // avoid cloning if possible
269            Arc::make_mut(&mut self).set_name(Field::LIST_FIELD_DEFAULT_NAME);
270        }
271        self
272    }
273}