1use crate::error::ArrowError;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::hash::{Hash, Hasher};
22use std::sync::Arc;
23
24use crate::datatype::DataType;
25#[cfg(feature = "canonical_extension_types")]
26use crate::extension::CanonicalExtensionType;
27use crate::schema::SchemaBuilder;
28use crate::{
29 Fields, UnionFields, UnionMode,
30 extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31};
32
33pub type FieldRef = Arc<Field>;
35
36#[derive(Clone)]
48#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49pub struct Field {
50 name: String,
51 data_type: DataType,
52 nullable: bool,
53 #[deprecated(
54 since = "54.0.0",
55 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56 )]
57 dict_id: i64,
58 dict_is_ordered: bool,
59 metadata: HashMap<String, String>,
61}
62
63impl std::fmt::Debug for Field {
64 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 #![expect(deprecated)] let Self {
67 name,
68 data_type,
69 nullable,
70 dict_id,
71 dict_is_ordered,
72 metadata,
73 } = self;
74
75 let mut s = f.debug_struct("Field");
76
77 if name != "item" {
78 s.field("name", name);
80 }
81
82 s.field("data_type", data_type);
83
84 if *nullable {
85 s.field("nullable", nullable);
86 }
87
88 if *dict_id != 0 {
89 s.field("dict_id", dict_id);
90 }
91
92 if *dict_is_ordered {
93 s.field("dict_is_ordered", dict_is_ordered);
94 }
95
96 if !metadata.is_empty() {
97 s.field("metadata", metadata);
98 }
99 s.finish()
100 }
101}
102
103impl PartialEq for Field {
109 fn eq(&self, other: &Self) -> bool {
110 self.name == other.name
111 && self.data_type == other.data_type
112 && self.nullable == other.nullable
113 && self.metadata == other.metadata
114 }
115}
116
117impl Eq for Field {}
118
119impl PartialOrd for Field {
120 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121 Some(self.cmp(other))
122 }
123}
124
125impl Ord for Field {
126 fn cmp(&self, other: &Self) -> Ordering {
127 self.name
128 .cmp(other.name())
129 .then_with(|| self.data_type.cmp(other.data_type()))
130 .then_with(|| self.nullable.cmp(&other.nullable))
131 .then_with(|| {
132 let mut keys: Vec<&String> =
134 self.metadata.keys().chain(other.metadata.keys()).collect();
135 keys.sort();
136 for k in keys {
137 match (self.metadata.get(k), other.metadata.get(k)) {
138 (None, None) => {}
139 (Some(_), None) => {
140 return Ordering::Less;
141 }
142 (None, Some(_)) => {
143 return Ordering::Greater;
144 }
145 (Some(v1), Some(v2)) => match v1.cmp(v2) {
146 Ordering::Equal => {}
147 other => {
148 return other;
149 }
150 },
151 }
152 }
153
154 Ordering::Equal
155 })
156 }
157}
158
159impl Hash for Field {
160 fn hash<H: Hasher>(&self, state: &mut H) {
161 self.name.hash(state);
162 self.data_type.hash(state);
163 self.nullable.hash(state);
164
165 let mut keys: Vec<&String> = self.metadata.keys().collect();
167 keys.sort();
168 for k in keys {
169 k.hash(state);
170 self.metadata.get(k).expect("key valid").hash(state);
171 }
172 }
173}
174
175impl AsRef<Field> for Field {
176 fn as_ref(&self) -> &Field {
177 self
178 }
179}
180
181impl Field {
182 pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185 pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193 #[allow(deprecated)]
194 Field {
195 name: name.into(),
196 data_type,
197 nullable,
198 dict_id: 0,
199 dict_is_ordered: false,
200 metadata: HashMap::default(),
201 }
202 }
203
204 pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219 Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220 }
221
222 #[deprecated(
224 since = "54.0.0",
225 note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226 )]
227 pub fn new_dict(
228 name: impl Into<String>,
229 data_type: DataType,
230 nullable: bool,
231 dict_id: i64,
232 dict_is_ordered: bool,
233 ) -> Self {
234 #[allow(deprecated)]
235 Field {
236 name: name.into(),
237 data_type,
238 nullable,
239 dict_id,
240 dict_is_ordered,
241 metadata: HashMap::default(),
242 }
243 }
244
245 pub fn new_dictionary(
253 name: impl Into<String>,
254 key: DataType,
255 value: DataType,
256 nullable: bool,
257 ) -> Self {
258 assert!(
259 key.is_dictionary_key_type(),
260 "{key} is not a valid dictionary key"
261 );
262 let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263 Self::new(name, data_type, nullable)
264 }
265
266 pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272 Self::new(name, DataType::Struct(fields.into()), nullable)
273 }
274
275 pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281 Self::new(name, DataType::List(value.into()), nullable)
282 }
283
284 pub fn new_large_list(
290 name: impl Into<String>,
291 value: impl Into<FieldRef>,
292 nullable: bool,
293 ) -> Self {
294 Self::new(name, DataType::LargeList(value.into()), nullable)
295 }
296
297 pub fn new_fixed_size_list(
304 name: impl Into<String>,
305 value: impl Into<FieldRef>,
306 size: i32,
307 nullable: bool,
308 ) -> Self {
309 Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310 }
311
312 pub fn new_map(
321 name: impl Into<String>,
322 entries: impl Into<String>,
323 keys: impl Into<FieldRef>,
324 values: impl Into<FieldRef>,
325 sorted: bool,
326 nullable: bool,
327 ) -> Self {
328 let data_type = DataType::Map(
329 Arc::new(Field::new(
330 entries.into(),
331 DataType::Struct(Fields::from([keys.into(), values.into()])),
332 false, )),
334 sorted,
335 );
336 Self::new(name, data_type, nullable)
337 }
338
339 pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
353 where
354 S: Into<String>,
355 F: IntoIterator,
356 F::Item: Into<FieldRef>,
357 T: IntoIterator<Item = i8>,
358 {
359 Self::new(
360 name,
361 DataType::Union(
362 UnionFields::try_new(type_ids, fields).expect("Invalid UnionField"),
363 mode,
364 ),
365 false, )
367 }
368
369 #[inline]
371 pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
372 self.metadata = metadata;
373 }
374
375 pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
377 self.set_metadata(metadata);
378 self
379 }
380
381 #[inline]
383 pub const fn metadata(&self) -> &HashMap<String, String> {
384 &self.metadata
385 }
386
387 #[inline]
389 pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
390 &mut self.metadata
391 }
392
393 #[inline]
395 pub const fn name(&self) -> &String {
396 &self.name
397 }
398
399 #[inline]
401 pub fn set_name(&mut self, name: impl Into<String>) {
402 self.name = name.into();
403 }
404
405 pub fn with_name(mut self, name: impl Into<String>) -> Self {
415 self.set_name(name);
416 self
417 }
418
419 #[inline]
421 pub const fn data_type(&self) -> &DataType {
422 &self.data_type
423 }
424
425 #[inline]
435 pub fn set_data_type(&mut self, data_type: DataType) {
436 self.data_type = data_type;
437 }
438
439 pub fn with_data_type(mut self, data_type: DataType) -> Self {
449 self.set_data_type(data_type);
450 self
451 }
452
453 pub fn extension_type_name(&self) -> Option<&str> {
475 self.metadata()
476 .get(EXTENSION_TYPE_NAME_KEY)
477 .map(String::as_ref)
478 }
479
480 pub fn extension_type_metadata(&self) -> Option<&str> {
502 self.metadata()
503 .get(EXTENSION_TYPE_METADATA_KEY)
504 .map(String::as_ref)
505 }
506
507 pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
578 E::try_new_from_field_metadata(self.data_type(), self.metadata())
579 }
580
581 pub fn extension_type<E: ExtensionType>(&self) -> E {
589 self.try_extension_type::<E>()
590 .unwrap_or_else(|e| panic!("{e}"))
591 }
592
593 pub fn try_with_extension_type<E: ExtensionType>(
606 &mut self,
607 extension_type: E,
608 ) -> Result<(), ArrowError> {
609 extension_type.supports_data_type(&self.data_type)?;
611
612 self.metadata
613 .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
614 match extension_type.serialize_metadata() {
615 Some(metadata) => self
616 .metadata
617 .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
618 None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
621 };
622
623 Ok(())
624 }
625
626 pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
634 self.try_with_extension_type(extension_type)
635 .unwrap_or_else(|e| panic!("{e}"));
636 self
637 }
638
639 #[cfg(feature = "canonical_extension_types")]
648 pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
649 CanonicalExtensionType::try_from(self)
650 }
651
652 #[inline]
656 pub const fn is_nullable(&self) -> bool {
657 self.nullable
658 }
659
660 #[inline]
670 pub fn set_nullable(&mut self, nullable: bool) {
671 self.nullable = nullable;
672 }
673
674 pub fn with_nullable(mut self, nullable: bool) -> Self {
684 self.set_nullable(nullable);
685 self
686 }
687
688 pub(crate) fn fields(&self) -> Vec<&Field> {
691 let mut collected_fields = vec![self];
692 collected_fields.append(&mut Field::_fields(&self.data_type));
693
694 collected_fields
695 }
696
697 fn _fields(dt: &DataType) -> Vec<&Field> {
698 match dt {
699 DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
700 DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
701 DataType::List(field)
702 | DataType::LargeList(field)
703 | DataType::ListView(field)
704 | DataType::LargeListView(field)
705 | DataType::FixedSizeList(field, _)
706 | DataType::Map(field, _) => field.fields(),
707 DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
708 DataType::RunEndEncoded(_, field) => field.fields(),
709 _ => vec![],
710 }
711 }
712
713 #[inline]
716 #[deprecated(
717 since = "54.0.0",
718 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
719 )]
720 pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
721 self.fields()
722 .into_iter()
723 .filter(|&field| {
724 #[allow(deprecated)]
725 let matching_dict_id = field.dict_id == id;
726 matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
727 })
728 .collect()
729 }
730
731 #[inline]
733 #[deprecated(
734 since = "54.0.0",
735 note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
736 )]
737 pub const fn dict_id(&self) -> Option<i64> {
738 match self.data_type {
739 #[allow(deprecated)]
740 DataType::Dictionary(_, _) => Some(self.dict_id),
741 _ => None,
742 }
743 }
744
745 #[inline]
760 pub const fn dict_is_ordered(&self) -> Option<bool> {
761 match self.data_type {
762 DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
763 _ => None,
764 }
765 }
766
767 pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
773 if matches!(self.data_type, DataType::Dictionary(_, _)) {
774 self.dict_is_ordered = dict_is_ordered;
775 };
776 self
777 }
778
779 pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
794 if from.dict_is_ordered != self.dict_is_ordered {
795 return Err(ArrowError::SchemaError(format!(
796 "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
797 self.name, from.dict_is_ordered, self.dict_is_ordered
798 )));
799 }
800 match (self.metadata().is_empty(), from.metadata().is_empty()) {
802 (false, false) => {
803 let mut merged = self.metadata().clone();
804 for (key, from_value) in from.metadata() {
805 if let Some(self_value) = self.metadata.get(key) {
806 if self_value != from_value {
807 return Err(ArrowError::SchemaError(format!(
808 "Fail to merge field '{}' due to conflicting metadata data value for key {}.
809 From value = {} does not match {}", self.name, key, from_value, self_value),
810 ));
811 }
812 } else {
813 merged.insert(key.clone(), from_value.clone());
814 }
815 }
816 self.set_metadata(merged);
817 }
818 (true, false) => {
819 self.set_metadata(from.metadata().clone());
820 }
821 _ => {}
822 }
823 match &mut self.data_type {
824 DataType::Struct(nested_fields) => match &from.data_type {
825 DataType::Struct(from_nested_fields) => {
826 let mut builder = SchemaBuilder::new();
827 nested_fields
828 .iter()
829 .chain(from_nested_fields)
830 .try_for_each(|f| builder.try_merge(f))?;
831 *nested_fields = builder.finish().fields;
832 }
833 DataType::Null => {
834 self.nullable = true;
835 }
836 _ => {
837 return Err(ArrowError::SchemaError(format!(
838 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
839 self.name, from.data_type
840 )));
841 }
842 },
843 DataType::Union(nested_fields, _) => match &from.data_type {
844 DataType::Union(from_nested_fields, _) => {
845 nested_fields.try_merge(from_nested_fields)?
846 }
847 DataType::Null => {
848 self.nullable = true;
849 }
850 _ => {
851 return Err(ArrowError::SchemaError(format!(
852 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
853 self.name, from.data_type
854 )));
855 }
856 },
857 DataType::List(field) => match &from.data_type {
858 DataType::List(from_field) => {
859 let mut f = (**field).clone();
860 f.try_merge(from_field)?;
861 (*field) = Arc::new(f);
862 }
863 DataType::Null => {
864 self.nullable = true;
865 }
866 _ => {
867 return Err(ArrowError::SchemaError(format!(
868 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
869 self.name, from.data_type
870 )));
871 }
872 },
873 DataType::LargeList(field) => match &from.data_type {
874 DataType::LargeList(from_field) => {
875 let mut f = (**field).clone();
876 f.try_merge(from_field)?;
877 (*field) = Arc::new(f);
878 }
879 DataType::Null => {
880 self.nullable = true;
881 }
882 _ => {
883 return Err(ArrowError::SchemaError(format!(
884 "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
885 self.name, from.data_type
886 )));
887 }
888 },
889 DataType::Null => {
890 self.nullable = true;
891 self.data_type = from.data_type.clone();
892 }
893 DataType::Boolean
894 | DataType::Int8
895 | DataType::Int16
896 | DataType::Int32
897 | DataType::Int64
898 | DataType::UInt8
899 | DataType::UInt16
900 | DataType::UInt32
901 | DataType::UInt64
902 | DataType::Float16
903 | DataType::Float32
904 | DataType::Float64
905 | DataType::Timestamp(_, _)
906 | DataType::Date32
907 | DataType::Date64
908 | DataType::Time32(_)
909 | DataType::Time64(_)
910 | DataType::Duration(_)
911 | DataType::Binary
912 | DataType::LargeBinary
913 | DataType::BinaryView
914 | DataType::Interval(_)
915 | DataType::LargeListView(_)
916 | DataType::ListView(_)
917 | DataType::Map(_, _)
918 | DataType::Dictionary(_, _)
919 | DataType::RunEndEncoded(_, _)
920 | DataType::FixedSizeList(_, _)
921 | DataType::FixedSizeBinary(_)
922 | DataType::Utf8
923 | DataType::LargeUtf8
924 | DataType::Utf8View
925 | DataType::Decimal32(_, _)
926 | DataType::Decimal64(_, _)
927 | DataType::Decimal128(_, _)
928 | DataType::Decimal256(_, _) => {
929 if from.data_type == DataType::Null {
930 self.nullable = true;
931 } else if self.data_type != from.data_type {
932 return Err(ArrowError::SchemaError(format!(
933 "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
934 self.name, from.data_type, self.data_type
935 )));
936 }
937 }
938 }
939 self.nullable |= from.nullable;
940
941 Ok(())
942 }
943
944 pub fn contains(&self, other: &Field) -> bool {
950 self.name == other.name
951 && self.data_type.contains(&other.data_type)
952 && self.dict_is_ordered == other.dict_is_ordered
953 && (self.nullable || !other.nullable)
955 && other.metadata.iter().all(|(k, v1)| {
957 self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
958 })
959 }
960
961 pub fn size(&self) -> usize {
965 std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
966 + self.data_type.size()
967 + self.name.capacity()
968 + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
969 + self
970 .metadata
971 .iter()
972 .map(|(k, v)| k.capacity() + v.capacity())
973 .sum::<usize>()
974 }
975}
976
977impl std::fmt::Display for Field {
978 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
979 #![expect(deprecated)] let Self {
981 name,
982 data_type,
983 nullable,
984 dict_id,
985 dict_is_ordered,
986 metadata,
987 } = self;
988 let maybe_nullable = if *nullable { "nullable " } else { "" };
989 let metadata_str = if metadata.is_empty() {
990 String::new()
991 } else {
992 format!(", metadata: {metadata:?}")
993 };
994 let dict_id_str = if dict_id == &0 {
995 String::new()
996 } else {
997 format!(", dict_id: {dict_id}")
998 };
999 let dict_is_ordered_str = if *dict_is_ordered {
1000 ", dict_is_ordered"
1001 } else {
1002 ""
1003 };
1004 write!(
1005 f,
1006 "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1007 )
1008 }
1009}
1010
1011#[cfg(test)]
1012mod test {
1013 use super::*;
1014 use std::collections::hash_map::DefaultHasher;
1015
1016 #[test]
1017 fn test_new_with_string() {
1018 let s = "c1";
1020 Field::new(s, DataType::Int64, false);
1021 }
1022
1023 #[test]
1024 fn test_new_dict_with_string() {
1025 let s = "c1";
1027 #[allow(deprecated)]
1028 Field::new_dict(s, DataType::Int64, false, 4, false);
1029 }
1030
1031 #[test]
1032 #[cfg_attr(miri, ignore)] fn test_debug_format_field() {
1034 insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1036 Field {
1037 data_type: UInt8,
1038 }
1039 ");
1040 insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1041 Field {
1042 name: "column",
1043 data_type: LargeUtf8,
1044 nullable: true,
1045 }
1046 "#);
1047 }
1048
1049 #[test]
1050 fn test_merge_incompatible_types() {
1051 let mut field = Field::new("c1", DataType::Int64, false);
1052 let result = field
1053 .try_merge(&Field::new("c1", DataType::Float32, true))
1054 .expect_err("should fail")
1055 .to_string();
1056 assert_eq!(
1057 "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1058 result
1059 );
1060 }
1061
1062 #[test]
1063 fn test_merge_with_null() {
1064 let mut field1 = Field::new("c1", DataType::Null, true);
1065 field1
1066 .try_merge(&Field::new("c1", DataType::Float32, false))
1067 .expect("should widen type to nullable float");
1068 assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1069
1070 let mut field2 = Field::new("c2", DataType::Utf8, false);
1071 field2
1072 .try_merge(&Field::new("c2", DataType::Null, true))
1073 .expect("should widen type to nullable utf8");
1074 assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1075 }
1076
1077 #[test]
1078 fn test_merge_with_nested_null() {
1079 let mut struct1 = Field::new(
1080 "s1",
1081 DataType::Struct(Fields::from(vec![Field::new(
1082 "inner",
1083 DataType::Float32,
1084 false,
1085 )])),
1086 false,
1087 );
1088
1089 let struct2 = Field::new(
1090 "s2",
1091 DataType::Struct(Fields::from(vec![Field::new(
1092 "inner",
1093 DataType::Null,
1094 false,
1095 )])),
1096 true,
1097 );
1098
1099 struct1
1100 .try_merge(&struct2)
1101 .expect("should widen inner field's type to nullable float");
1102 assert_eq!(
1103 Field::new(
1104 "s1",
1105 DataType::Struct(Fields::from(vec![Field::new(
1106 "inner",
1107 DataType::Float32,
1108 true,
1109 )])),
1110 true,
1111 ),
1112 struct1
1113 );
1114
1115 let mut list1 = Field::new(
1116 "l1",
1117 DataType::List(Field::new("inner", DataType::Float32, false).into()),
1118 false,
1119 );
1120
1121 let list2 = Field::new(
1122 "l2",
1123 DataType::List(Field::new("inner", DataType::Null, false).into()),
1124 true,
1125 );
1126
1127 list1
1128 .try_merge(&list2)
1129 .expect("should widen inner field's type to nullable float");
1130 assert_eq!(
1131 Field::new(
1132 "l1",
1133 DataType::List(Field::new("inner", DataType::Float32, true).into()),
1134 true,
1135 ),
1136 list1
1137 );
1138
1139 let mut large_list1 = Field::new(
1140 "ll1",
1141 DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1142 false,
1143 );
1144
1145 let large_list2 = Field::new(
1146 "ll2",
1147 DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1148 true,
1149 );
1150
1151 large_list1
1152 .try_merge(&large_list2)
1153 .expect("should widen inner field's type to nullable float");
1154 assert_eq!(
1155 Field::new(
1156 "ll1",
1157 DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1158 true,
1159 ),
1160 large_list1
1161 );
1162 }
1163
1164 #[test]
1165 fn test_fields_with_dict_id() {
1166 #[allow(deprecated)]
1167 let dict1 = Field::new_dict(
1168 "dict1",
1169 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1170 false,
1171 10,
1172 false,
1173 );
1174 #[allow(deprecated)]
1175 let dict2 = Field::new_dict(
1176 "dict2",
1177 DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1178 false,
1179 20,
1180 false,
1181 );
1182
1183 let field = Field::new(
1184 "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1185 DataType::Struct(Fields::from(vec![
1186 dict1.clone(),
1187 Field::new(
1188 "list[struct<dict1, list[struct<dict2>]>]",
1189 DataType::List(Arc::new(Field::new(
1190 "struct<dict1, list[struct<dict2>]>",
1191 DataType::Struct(Fields::from(vec![
1192 dict1.clone(),
1193 Field::new(
1194 "list[struct<dict2>]",
1195 DataType::List(Arc::new(Field::new(
1196 "struct<dict2>",
1197 DataType::Struct(vec![dict2.clone()].into()),
1198 false,
1199 ))),
1200 false,
1201 ),
1202 ])),
1203 false,
1204 ))),
1205 false,
1206 ),
1207 ])),
1208 false,
1209 );
1210
1211 #[allow(deprecated)]
1212 for field in field.fields_with_dict_id(10) {
1213 assert_eq!(dict1, *field);
1214 }
1215 #[allow(deprecated)]
1216 for field in field.fields_with_dict_id(20) {
1217 assert_eq!(dict2, *field);
1218 }
1219 }
1220
1221 fn get_field_hash(field: &Field) -> u64 {
1222 let mut s = DefaultHasher::new();
1223 field.hash(&mut s);
1224 s.finish()
1225 }
1226
1227 #[test]
1228 fn test_field_comparison_case() {
1229 #[allow(deprecated)]
1231 let dict1 = Field::new_dict(
1232 "dict1",
1233 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1234 false,
1235 10,
1236 false,
1237 );
1238 #[allow(deprecated)]
1239 let dict2 = Field::new_dict(
1240 "dict1",
1241 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1242 false,
1243 20,
1244 false,
1245 );
1246
1247 assert_eq!(dict1, dict2);
1248 assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1249
1250 #[allow(deprecated)]
1251 let dict1 = Field::new_dict(
1252 "dict0",
1253 DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1254 false,
1255 10,
1256 false,
1257 );
1258
1259 assert_ne!(dict1, dict2);
1260 assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1261 }
1262
1263 #[test]
1264 fn test_field_comparison_metadata() {
1265 let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1266 (String::from("k1"), String::from("v1")),
1267 (String::from("k2"), String::from("v2")),
1268 ]));
1269 let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1270 (String::from("k1"), String::from("v1")),
1271 (String::from("k3"), String::from("v3")),
1272 ]));
1273 let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1274 (String::from("k1"), String::from("v1")),
1275 (String::from("k3"), String::from("v4")),
1276 ]));
1277
1278 assert!(f1.cmp(&f2).is_lt());
1279 assert!(f2.cmp(&f3).is_lt());
1280 assert!(f1.cmp(&f3).is_lt());
1281 }
1282
1283 #[test]
1284 #[expect(clippy::needless_borrows_for_generic_args)] fn test_field_as_ref() {
1286 let field = || Field::new("x", DataType::Binary, false);
1287
1288 fn accept_ref(_: impl AsRef<Field>) {}
1291
1292 accept_ref(field());
1293 accept_ref(&field());
1294 accept_ref(&&field());
1295 accept_ref(Arc::new(field()));
1296 accept_ref(&Arc::new(field()));
1297 accept_ref(&&Arc::new(field()));
1298
1299 fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1302
1303 accept_refs(vec![field()]);
1304 accept_refs(vec![&field()]);
1305 accept_refs(vec![Arc::new(field())]);
1306 accept_refs(vec![&Arc::new(field())]);
1307 accept_refs(&vec![field()]);
1308 accept_refs(&vec![&field()]);
1309 accept_refs(&vec![Arc::new(field())]);
1310 accept_refs(&vec![&Arc::new(field())]);
1311 }
1312
1313 #[test]
1314 fn test_contains_reflexivity() {
1315 let mut field = Field::new("field1", DataType::Float16, false);
1316 field.set_metadata(HashMap::from([
1317 (String::from("k0"), String::from("v0")),
1318 (String::from("k1"), String::from("v1")),
1319 ]));
1320 assert!(field.contains(&field))
1321 }
1322
1323 #[test]
1324 fn test_contains_transitivity() {
1325 let child_field = Field::new("child1", DataType::Float16, false);
1326
1327 let mut field1 = Field::new(
1328 "field1",
1329 DataType::Struct(Fields::from(vec![child_field])),
1330 false,
1331 );
1332 field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1333
1334 let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1335 field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1336 field2.try_merge(&field1).unwrap();
1337
1338 let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1339 field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1340 field3.try_merge(&field2).unwrap();
1341
1342 assert!(field2.contains(&field1));
1343 assert!(field3.contains(&field2));
1344 assert!(field3.contains(&field1));
1345
1346 assert!(!field1.contains(&field2));
1347 assert!(!field1.contains(&field3));
1348 assert!(!field2.contains(&field3));
1349 }
1350
1351 #[test]
1352 fn test_contains_nullable() {
1353 let field1 = Field::new("field1", DataType::Boolean, true);
1354 let field2 = Field::new("field1", DataType::Boolean, false);
1355 assert!(field1.contains(&field2));
1356 assert!(!field2.contains(&field1));
1357 }
1358
1359 #[test]
1360 fn test_contains_must_have_same_fields() {
1361 let child_field1 = Field::new("child1", DataType::Float16, false);
1362 let child_field2 = Field::new("child2", DataType::Float16, false);
1363
1364 let field1 = Field::new(
1365 "field1",
1366 DataType::Struct(vec![child_field1.clone()].into()),
1367 true,
1368 );
1369 let field2 = Field::new(
1370 "field1",
1371 DataType::Struct(vec![child_field1, child_field2].into()),
1372 true,
1373 );
1374
1375 assert!(!field1.contains(&field2));
1376 assert!(!field2.contains(&field1));
1377
1378 let field1 = Field::new(
1380 "field1",
1381 DataType::Union(
1382 UnionFields::try_new(
1383 vec![1, 2],
1384 vec![
1385 Field::new("field1", DataType::UInt8, true),
1386 Field::new("field3", DataType::Utf8, false),
1387 ],
1388 )
1389 .unwrap(),
1390 UnionMode::Dense,
1391 ),
1392 true,
1393 );
1394 let field2 = Field::new(
1395 "field1",
1396 DataType::Union(
1397 UnionFields::try_new(
1398 vec![1, 3],
1399 vec![
1400 Field::new("field1", DataType::UInt8, false),
1401 Field::new("field3", DataType::Utf8, false),
1402 ],
1403 )
1404 .unwrap(),
1405 UnionMode::Dense,
1406 ),
1407 true,
1408 );
1409 assert!(!field1.contains(&field2));
1410
1411 let field1 = Field::new(
1413 "field1",
1414 DataType::Union(
1415 UnionFields::try_new(
1416 vec![1, 2],
1417 vec![
1418 Field::new("field1", DataType::UInt8, true),
1419 Field::new("field3", DataType::Utf8, false),
1420 ],
1421 )
1422 .unwrap(),
1423 UnionMode::Dense,
1424 ),
1425 true,
1426 );
1427 let field2 = Field::new(
1428 "field1",
1429 DataType::Union(
1430 UnionFields::try_new(
1431 vec![1, 2],
1432 vec![
1433 Field::new("field1", DataType::UInt8, false),
1434 Field::new("field3", DataType::Utf8, false),
1435 ],
1436 )
1437 .unwrap(),
1438 UnionMode::Dense,
1439 ),
1440 true,
1441 );
1442 assert!(field1.contains(&field2));
1443 }
1444
1445 #[cfg(feature = "serde")]
1446 fn assert_binary_serde_round_trip(field: Field) {
1447 let serialized = postcard::to_stdvec(&field).unwrap();
1448 let deserialized: Field = postcard::from_bytes(&serialized).unwrap();
1449 assert_eq!(field, deserialized)
1450 }
1451
1452 #[cfg(feature = "serde")]
1453 #[test]
1454 fn test_field_without_metadata_serde() {
1455 let field = Field::new("name", DataType::Boolean, true);
1456 assert_binary_serde_round_trip(field)
1457 }
1458
1459 #[cfg(feature = "serde")]
1460 #[test]
1461 fn test_field_with_empty_metadata_serde() {
1462 let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1463
1464 assert_binary_serde_round_trip(field)
1465 }
1466
1467 #[cfg(feature = "serde")]
1468 #[test]
1469 fn test_field_with_nonempty_metadata_serde() {
1470 let mut metadata = HashMap::new();
1471 metadata.insert("hi".to_owned(), "".to_owned());
1472 let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1473
1474 assert_binary_serde_round_trip(field)
1475 }
1476
1477 #[test]
1478 fn test_merge_compound_with_null() {
1479 let mut field = Field::new(
1481 "s",
1482 DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
1483 false,
1484 );
1485 field
1486 .try_merge(&Field::new("s", DataType::Null, true))
1487 .expect("Struct should merge with Null");
1488 assert!(field.is_nullable());
1489 assert!(matches!(field.data_type(), DataType::Struct(_)));
1490
1491 let mut field = Field::new(
1493 "l",
1494 DataType::List(Field::new("item", DataType::Utf8, false).into()),
1495 false,
1496 );
1497 field
1498 .try_merge(&Field::new("l", DataType::Null, true))
1499 .expect("List should merge with Null");
1500 assert!(field.is_nullable());
1501 assert!(matches!(field.data_type(), DataType::List(_)));
1502
1503 let mut field = Field::new(
1505 "ll",
1506 DataType::LargeList(Field::new("item", DataType::Utf8, false).into()),
1507 false,
1508 );
1509 field
1510 .try_merge(&Field::new("ll", DataType::Null, true))
1511 .expect("LargeList should merge with Null");
1512 assert!(field.is_nullable());
1513 assert!(matches!(field.data_type(), DataType::LargeList(_)));
1514
1515 let mut field = Field::new(
1517 "u",
1518 DataType::Union(
1519 UnionFields::try_new(vec![0], vec![Field::new("f", DataType::Int32, false)])
1520 .unwrap(),
1521 UnionMode::Dense,
1522 ),
1523 false,
1524 );
1525 field
1526 .try_merge(&Field::new("u", DataType::Null, true))
1527 .expect("Union should merge with Null");
1528 assert!(matches!(field.data_type(), DataType::Union(_, _)));
1529 }
1530}