iceberg_rust_spec/arrow/
schema.rs

1/*!
2 * Convert between datafusion and iceberg schema
3*/
4
5use std::{collections::HashMap, convert::TryInto, ops::Deref, sync::Arc};
6
7use crate::{
8    spec::types::{PrimitiveType, StructField, StructType, Type},
9    types::ListType,
10};
11use arrow_schema::{DataType, Field, Fields, Schema as ArrowSchema, TimeUnit};
12
13use crate::error::Error;
14
15pub const PARQUET_FIELD_ID_META_KEY: &str = "PARQUET:field_id";
16
17impl TryInto<ArrowSchema> for &StructType {
18    type Error = Error;
19
20    fn try_into(self) -> Result<ArrowSchema, Self::Error> {
21        let fields = self.try_into()?;
22        let metadata = HashMap::new();
23        Ok(ArrowSchema { fields, metadata })
24    }
25}
26
27impl TryInto<Fields> for &StructType {
28    type Error = Error;
29
30    fn try_into(self) -> Result<Fields, Self::Error> {
31        let fields = self
32            .iter()
33            .map(|field| {
34                Ok(Field::new(
35                    &field.name,
36                    (&field.field_type).try_into()?,
37                    !field.required,
38                )
39                .with_metadata(HashMap::from_iter(vec![(
40                    PARQUET_FIELD_ID_META_KEY.to_string(),
41                    field.id.to_string(),
42                )])))
43            })
44            .collect::<Result<_, Error>>()?;
45        Ok(fields)
46    }
47}
48
49impl TryFrom<&ArrowSchema> for StructType {
50    type Error = Error;
51
52    fn try_from(value: &ArrowSchema) -> Result<Self, Self::Error> {
53        value.fields().try_into()
54    }
55}
56
57impl TryFrom<&Fields> for StructType {
58    type Error = Error;
59
60    fn try_from(value: &Fields) -> Result<Self, Self::Error> {
61        let fields = value
62            .iter()
63            .map(|field| {
64                Ok(StructField {
65                    id: get_field_id(field)?,
66                    name: field.name().to_owned(),
67                    required: !field.is_nullable(),
68                    field_type: field.data_type().try_into()?,
69                    doc: None,
70                })
71            })
72            .collect::<Result<_, Error>>()?;
73        Ok(StructType::new(fields))
74    }
75}
76
77impl TryFrom<&Type> for DataType {
78    type Error = Error;
79
80    fn try_from(value: &Type) -> Result<Self, Self::Error> {
81        match value {
82            Type::Primitive(primitive) => match primitive {
83                PrimitiveType::Boolean => Ok(DataType::Boolean),
84                PrimitiveType::Int => Ok(DataType::Int32),
85                PrimitiveType::Long => Ok(DataType::Int64),
86                PrimitiveType::Float => Ok(DataType::Float32),
87                PrimitiveType::Double => Ok(DataType::Float64),
88                PrimitiveType::Decimal { precision, scale } => {
89                    Ok(DataType::Decimal128(*precision as u8, *scale as i8))
90                }
91                PrimitiveType::Date => Ok(DataType::Date32),
92                PrimitiveType::Time => Ok(DataType::Time64(TimeUnit::Microsecond)),
93                PrimitiveType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)),
94                PrimitiveType::Timestamptz => Ok(DataType::Timestamp(
95                    TimeUnit::Microsecond,
96                    Some(Arc::from("UTC")),
97                )),
98                PrimitiveType::String => Ok(DataType::Utf8),
99                PrimitiveType::Uuid => Ok(DataType::Utf8),
100                PrimitiveType::Fixed(len) => Ok(DataType::FixedSizeBinary(*len as i32)),
101                PrimitiveType::Binary => Ok(DataType::Binary),
102            },
103            Type::List(list) => Ok(DataType::List(Arc::new(
104                Field::new(
105                    "item",
106                    (&list.element as &Type).try_into()?,
107                    !list.element_required,
108                )
109                .with_metadata(HashMap::from_iter(vec![(
110                    PARQUET_FIELD_ID_META_KEY.to_string(),
111                    list.element_id.to_string(),
112                )])),
113            ))),
114            Type::Struct(struc) => Ok(DataType::Struct(struc.try_into()?)),
115            Type::Map(map) => Ok(DataType::Map(
116                Arc::new(Field::new(
117                    "entries",
118                    DataType::Struct(Fields::from(vec![
119                        Field::new("key", (&map.key as &Type).try_into()?, false).with_metadata(
120                            HashMap::from_iter(vec![(
121                                PARQUET_FIELD_ID_META_KEY.to_string(),
122                                map.key_id.to_string(),
123                            )]),
124                        ),
125                        Field::new(
126                            "value",
127                            (&map.value as &Type).try_into()?,
128                            !map.value_required,
129                        )
130                        .with_metadata(HashMap::from_iter(vec![(
131                            PARQUET_FIELD_ID_META_KEY.to_string(),
132                            map.value_id.to_string(),
133                        )])),
134                    ])),
135                    false,
136                )),
137                false,
138            )),
139        }
140    }
141}
142
143impl TryFrom<&DataType> for Type {
144    type Error = Error;
145
146    fn try_from(value: &DataType) -> Result<Self, Self::Error> {
147        match value {
148            DataType::Boolean => Ok(Type::Primitive(PrimitiveType::Boolean)),
149            DataType::Int8 | DataType::Int16 | DataType::Int32 => {
150                Ok(Type::Primitive(PrimitiveType::Int))
151            }
152            DataType::Int64 => Ok(Type::Primitive(PrimitiveType::Long)),
153            DataType::Float32 => Ok(Type::Primitive(PrimitiveType::Float)),
154            DataType::Float64 => Ok(Type::Primitive(PrimitiveType::Double)),
155            DataType::Decimal128(precision, scale) => Ok(Type::Primitive(PrimitiveType::Decimal {
156                precision: *precision as u32,
157                scale: *scale as u32,
158            })),
159            DataType::Date32 => Ok(Type::Primitive(PrimitiveType::Date)),
160            DataType::Time64(_) => Ok(Type::Primitive(PrimitiveType::Time)),
161            DataType::Timestamp(_, _) => Ok(Type::Primitive(PrimitiveType::Timestamp)),
162            DataType::Utf8 => Ok(Type::Primitive(PrimitiveType::String)),
163            DataType::Utf8View => Ok(Type::Primitive(PrimitiveType::String)),
164            DataType::FixedSizeBinary(len) => {
165                Ok(Type::Primitive(PrimitiveType::Fixed(*len as u64)))
166            }
167            DataType::Binary => Ok(Type::Primitive(PrimitiveType::Binary)),
168            DataType::Struct(fields) => Ok(Type::Struct(fields.try_into()?)),
169            DataType::List(field) => Ok(Type::List(ListType {
170                element_id: get_field_id(field)?,
171                element_required: !field.is_nullable(),
172                element: Box::new(field.data_type().try_into()?),
173            })),
174            x => Err(Error::NotSupported(format!(
175                "Arrow datatype {x} is not supported."
176            ))),
177        }
178    }
179}
180
181fn get_field_id(field: &Field) -> Result<i32, Error> {
182    field
183        .metadata()
184        .get(PARQUET_FIELD_ID_META_KEY)
185        .ok_or(Error::NotFound(format!(
186            "Parquet field id of field {field}"
187        )))
188        .and_then(|x| x.parse().map_err(Error::from))
189}
190
191pub fn new_fields_with_ids(fields: &Fields, index: &mut i32) -> Fields {
192    fields
193        .into_iter()
194        .map(|field| {
195            *index += 1;
196            match field.data_type() {
197                DataType::Struct(fields) => {
198                    let temp = *index;
199                    Field::new(
200                        field.name(),
201                        DataType::Struct(new_fields_with_ids(fields, index)),
202                        field.is_nullable(),
203                    )
204                    .with_metadata(HashMap::from_iter(vec![(
205                        PARQUET_FIELD_ID_META_KEY.to_string(),
206                        temp.to_string(),
207                    )]))
208                }
209                DataType::List(list_field) => {
210                    let temp = *index;
211                    *index += 1;
212                    Field::new(
213                        field.name(),
214                        DataType::List(Arc::new(list_field.deref().clone().with_metadata(
215                            HashMap::from_iter(vec![(
216                                PARQUET_FIELD_ID_META_KEY.to_string(),
217                                index.to_string(),
218                            )]),
219                        ))),
220                        field.is_nullable(),
221                    )
222                    .with_metadata(HashMap::from_iter(vec![(
223                        PARQUET_FIELD_ID_META_KEY.to_string(),
224                        temp.to_string(),
225                    )]))
226                }
227                _ => field
228                    .deref()
229                    .clone()
230                    .with_metadata(HashMap::from_iter(vec![(
231                        PARQUET_FIELD_ID_META_KEY.to_string(),
232                        index.to_string(),
233                    )])),
234            }
235        })
236        .collect()
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242    use crate::spec::types::MapType;
243
244    #[test]
245    fn test_struct_type_to_arrow_schema_simple() {
246        let struct_type = StructType::new(vec![
247            StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
248            StructField::new(
249                2,
250                "field2",
251                false,
252                Type::Primitive(PrimitiveType::String),
253                None,
254            ),
255        ]);
256
257        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
258
259        assert_eq!(arrow_schema.fields().len(), 2);
260        assert_eq!(arrow_schema.field(0).name(), "field1");
261        assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
262        assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
263        assert!(!arrow_schema.field(0).is_nullable());
264        assert_eq!(arrow_schema.field(1).name(), "field2");
265        assert_eq!(get_field_id(arrow_schema.field(1)).unwrap(), 2);
266        assert_eq!(arrow_schema.field(1).data_type(), &DataType::Utf8);
267        assert!(arrow_schema.field(1).is_nullable());
268    }
269
270    #[test]
271    fn test_struct_type_to_arrow_schema_nested() {
272        let nested_struct = StructType::new(vec![
273            StructField::new(
274                3,
275                "nested1",
276                true,
277                Type::Primitive(PrimitiveType::Long),
278                None,
279            ),
280            StructField::new(
281                4,
282                "nested2",
283                false,
284                Type::Primitive(PrimitiveType::Boolean),
285                None,
286            ),
287        ]);
288
289        let struct_type = StructType::new(vec![
290            StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
291            StructField::new(2, "field2", false, Type::Struct(nested_struct), None),
292        ]);
293
294        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
295
296        assert_eq!(arrow_schema.fields().len(), 2);
297        assert_eq!(arrow_schema.field(0).name(), "field1");
298        assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
299        assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
300        assert!(!arrow_schema.field(0).is_nullable());
301
302        let nested_field = arrow_schema.field(1);
303        assert_eq!(nested_field.name(), "field2");
304        assert_eq!(get_field_id(nested_field).unwrap(), 2);
305        assert!(nested_field.is_nullable());
306
307        if let DataType::Struct(nested_fields) = nested_field.data_type() {
308            assert_eq!(nested_fields.len(), 2);
309            assert_eq!(nested_fields[0].name(), "nested1");
310            assert_eq!(get_field_id(&nested_fields[0]).unwrap(), 3);
311            assert_eq!(nested_fields[0].data_type(), &DataType::Int64);
312            assert!(!nested_fields[0].is_nullable());
313            assert_eq!(nested_fields[1].name(), "nested2");
314            assert_eq!(get_field_id(&nested_fields[1]).unwrap(), 4);
315            assert_eq!(nested_fields[1].data_type(), &DataType::Boolean);
316            assert!(nested_fields[1].is_nullable());
317        } else {
318            panic!("Expected nested field to be a struct");
319        }
320    }
321
322    #[test]
323    fn test_struct_type_to_arrow_schema_list() {
324        let list_type = Type::List(ListType {
325            element_id: 3,
326            element_required: false,
327            element: Box::new(Type::Primitive(PrimitiveType::Double)),
328        });
329
330        let struct_type = StructType::new(vec![
331            StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
332            StructField::new(2, "field2", false, list_type, None),
333        ]);
334
335        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
336
337        assert_eq!(arrow_schema.fields().len(), 2);
338        assert_eq!(arrow_schema.field(0).name(), "field1");
339        assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
340        assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
341        assert!(!arrow_schema.field(0).is_nullable());
342
343        let list_field = arrow_schema.field(1);
344        assert_eq!(list_field.name(), "field2");
345        assert_eq!(get_field_id(list_field).unwrap(), 2);
346        assert!(list_field.is_nullable());
347
348        if let DataType::List(element_field) = list_field.data_type() {
349            assert_eq!(element_field.data_type(), &DataType::Float64);
350            assert_eq!(get_field_id(element_field).unwrap(), 3);
351            assert!(element_field.is_nullable());
352        } else {
353            panic!("Expected list field");
354        }
355    }
356
357    #[test]
358    fn test_struct_type_to_arrow_schema_map() {
359        let map_type = Type::Map(MapType {
360            key_id: 3,
361            value_id: 4,
362            value_required: false,
363            key: Box::new(Type::Primitive(PrimitiveType::String)),
364            value: Box::new(Type::Primitive(PrimitiveType::Int)),
365        });
366
367        let struct_type = StructType::new(vec![
368            StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
369            StructField::new(2, "field2", false, map_type, None),
370        ]);
371
372        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
373
374        assert_eq!(arrow_schema.fields().len(), 2);
375        assert_eq!(arrow_schema.field(0).name(), "field1");
376        assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
377        assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
378        assert!(!arrow_schema.field(0).is_nullable());
379
380        let map_field = arrow_schema.field(1);
381        assert_eq!(map_field.name(), "field2");
382        assert_eq!(get_field_id(map_field).unwrap(), 2);
383        assert!(map_field.is_nullable());
384
385        if let DataType::Map(entries_field, _) = map_field.data_type() {
386            if let DataType::Struct(entry_fields) = entries_field.data_type() {
387                assert_eq!(entry_fields.len(), 2);
388                assert_eq!(entry_fields[0].name(), "key");
389                assert_eq!(get_field_id(&entry_fields[0]).unwrap(), 3);
390                assert_eq!(entry_fields[0].data_type(), &DataType::Utf8);
391                assert!(!entry_fields[0].is_nullable());
392                assert_eq!(entry_fields[1].name(), "value");
393                assert_eq!(get_field_id(&entry_fields[1]).unwrap(), 4);
394                assert_eq!(entry_fields[1].data_type(), &DataType::Int32);
395                assert!(entry_fields[1].is_nullable());
396            } else {
397                panic!("Expected struct field for map entries");
398            }
399        } else {
400            panic!("Expected map field");
401        }
402    }
403
404    #[test]
405    fn test_struct_type_to_arrow_schema_complex() {
406        let nested_struct = StructType::new(vec![
407            StructField::new(
408                4,
409                "nested1",
410                true,
411                Type::Primitive(PrimitiveType::Long),
412                None,
413            ),
414            StructField::new(
415                5,
416                "nested2",
417                false,
418                Type::Primitive(PrimitiveType::Boolean),
419                None,
420            ),
421        ]);
422
423        let list_type = Type::List(ListType {
424            element_id: 3,
425            element_required: true,
426            element: Box::new(Type::Struct(nested_struct)),
427        });
428
429        let map_type = Type::Map(MapType {
430            key_id: 7,
431            value_id: 8,
432            value_required: false,
433            key: Box::new(Type::Primitive(PrimitiveType::String)),
434            value: Box::new(Type::Primitive(PrimitiveType::Date)),
435        });
436
437        let struct_type = StructType::new(vec![
438            StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
439            StructField::new(2, "field2", false, list_type, None),
440            StructField::new(6, "field3", true, map_type, None),
441        ]);
442
443        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
444
445        assert_eq!(arrow_schema.fields().len(), 3);
446        // Assertions for field1 (simple int)
447        assert_eq!(arrow_schema.field(0).name(), "field1");
448        assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
449        assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
450        assert!(!arrow_schema.field(0).is_nullable());
451
452        // Assertions for field2 (list of structs)
453        let list_field = arrow_schema.field(1);
454        assert_eq!(list_field.name(), "field2");
455        assert_eq!(get_field_id(list_field).unwrap(), 2);
456        assert!(list_field.is_nullable());
457        if let DataType::List(element_field) = list_field.data_type() {
458            if let DataType::Struct(nested_fields) = element_field.data_type() {
459                assert_eq!(nested_fields.len(), 2);
460                assert_eq!(nested_fields[0].name(), "nested1");
461                assert_eq!(get_field_id(&nested_fields[0]).unwrap(), 4);
462                assert_eq!(nested_fields[0].data_type(), &DataType::Int64);
463                assert!(!nested_fields[0].is_nullable());
464                assert_eq!(nested_fields[1].name(), "nested2");
465                assert_eq!(get_field_id(&nested_fields[1]).unwrap(), 5);
466                assert_eq!(nested_fields[1].data_type(), &DataType::Boolean);
467                assert!(nested_fields[1].is_nullable());
468            } else {
469                panic!("Expected struct as list element");
470            }
471        } else {
472            panic!("Expected list field");
473        }
474
475        // Assertions for field3 (map of string to list of structs)
476        let map_field = arrow_schema.field(2);
477        assert_eq!(map_field.name(), "field3");
478        assert_eq!(get_field_id(map_field).unwrap(), 6);
479        assert!(!map_field.is_nullable());
480        if let DataType::Map(entries_field, _) = map_field.data_type() {
481            if let DataType::Struct(entry_fields) = entries_field.data_type() {
482                assert_eq!(entry_fields.len(), 2);
483                assert_eq!(entry_fields[0].name(), "key");
484                assert_eq!(get_field_id(&entry_fields[0]).unwrap(), 7);
485                assert_eq!(entry_fields[0].data_type(), &DataType::Utf8);
486                assert!(!entry_fields[0].is_nullable());
487
488                // Check the value (list of structs)
489                assert_eq!(entry_fields[1].name(), "value");
490                assert_eq!(get_field_id(&entry_fields[1]).unwrap(), 8);
491                assert!(entry_fields[1].is_nullable());
492            } else {
493                panic!("Expected struct field for map entries");
494            }
495        } else {
496            panic!("Expected map field");
497        }
498    }
499
500    #[test]
501    fn test_struct_type_to_arrow_schema_empty() {
502        let struct_type = StructType::new(vec![]);
503        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
504        assert_eq!(arrow_schema.fields().len(), 0);
505    }
506
507    #[test]
508    fn test_struct_type_to_arrow_schema_metadata() {
509        let struct_type = StructType::new(vec![StructField::new(
510            1,
511            "field1",
512            true,
513            Type::Primitive(PrimitiveType::Int),
514            None,
515        )]);
516
517        let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
518
519        // Check that the PARQUET:field_id metadata is set correctly
520        let field_metadata = arrow_schema.field(0).metadata();
521        assert_eq!(
522            field_metadata.get(PARQUET_FIELD_ID_META_KEY),
523            Some(&"1".to_string())
524        );
525    }
526
527    use arrow_schema::DataType;
528    use std::sync::Arc;
529
530    #[test]
531    fn test_arrow_schema_to_struct_type_simple() {
532        let arrow_schema = ArrowSchema::new(vec![
533            Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
534                PARQUET_FIELD_ID_META_KEY.to_string(),
535                "1".to_string(),
536            )])),
537            Field::new("field2", DataType::Utf8, true).with_metadata(HashMap::from([(
538                PARQUET_FIELD_ID_META_KEY.to_string(),
539                "2".to_string(),
540            )])),
541            Field::new("field3", DataType::Int16, true).with_metadata(HashMap::from([(
542                PARQUET_FIELD_ID_META_KEY.to_string(),
543                "3".to_string(),
544            )])),
545        ]);
546
547        let struct_type: StructType = (&arrow_schema).try_into().unwrap();
548
549        assert_eq!(struct_type[0].id, 1);
550        assert_eq!(struct_type[0].name, "field1");
551        assert!(struct_type[0].required);
552        assert_eq!(
553            struct_type[0].field_type,
554            Type::Primitive(PrimitiveType::Int)
555        );
556        assert_eq!(struct_type[1].id, 2);
557        assert_eq!(struct_type[1].name, "field2");
558        assert!(!struct_type[1].required);
559        assert_eq!(
560            struct_type[1].field_type,
561            Type::Primitive(PrimitiveType::String)
562        );
563        assert_eq!(struct_type[2].id, 3);
564        assert_eq!(struct_type[2].name, "field3");
565        assert!(!struct_type[2].required);
566        assert_eq!(
567            struct_type[2].field_type,
568            Type::Primitive(PrimitiveType::Int)
569        );
570    }
571
572    #[test]
573    fn test_arrow_schema_to_struct_type_nested() {
574        let nested_fields = Fields::from(vec![
575            Field::new("nested1", DataType::Int64, true).with_metadata(HashMap::from([(
576                PARQUET_FIELD_ID_META_KEY.to_string(),
577                "3".to_string(),
578            )])),
579            Field::new("nested2", DataType::Boolean, false).with_metadata(HashMap::from([(
580                PARQUET_FIELD_ID_META_KEY.to_string(),
581                "4".to_string(),
582            )])),
583        ]);
584
585        let arrow_schema = ArrowSchema::new(vec![
586            Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
587                PARQUET_FIELD_ID_META_KEY.to_string(),
588                "1".to_string(),
589            )])),
590            Field::new("field2", DataType::Struct(nested_fields), true).with_metadata(
591                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string())]),
592            ),
593        ]);
594
595        let struct_type: StructType = (&arrow_schema).try_into().unwrap();
596
597        assert_eq!(struct_type[0].id, 1);
598        assert_eq!(struct_type[0].name, "field1");
599        assert!(struct_type[0].required);
600        assert_eq!(
601            struct_type[0].field_type,
602            Type::Primitive(PrimitiveType::Int)
603        );
604
605        match &struct_type[1].field_type {
606            Type::Struct(nested_struct) => {
607                assert_eq!(nested_struct[0].id, 3);
608                assert_eq!(nested_struct[0].name, "nested1");
609                assert!(!nested_struct[0].required);
610                assert_eq!(
611                    nested_struct[0].field_type,
612                    Type::Primitive(PrimitiveType::Long)
613                );
614                assert_eq!(nested_struct[1].id, 4);
615                assert_eq!(nested_struct[1].name, "nested2");
616                assert!(nested_struct[1].required);
617                assert_eq!(
618                    nested_struct[1].field_type,
619                    Type::Primitive(PrimitiveType::Boolean)
620                );
621            }
622            _ => panic!("Expected nested struct"),
623        }
624    }
625
626    #[test]
627    fn test_arrow_schema_to_struct_type_list() {
628        let arrow_schema = ArrowSchema::new(vec![
629            Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
630                PARQUET_FIELD_ID_META_KEY.to_string(),
631                "1".to_string(),
632            )])),
633            Field::new(
634                "field2",
635                DataType::List(Arc::new(
636                    Field::new("item", DataType::Float64, true).with_metadata(HashMap::from([(
637                        PARQUET_FIELD_ID_META_KEY.to_string(),
638                        "3".to_string(),
639                    )])),
640                )),
641                true,
642            )
643            .with_metadata(HashMap::from([(
644                PARQUET_FIELD_ID_META_KEY.to_string(),
645                "2".to_string(),
646            )])),
647        ]);
648
649        let struct_type: StructType = (&arrow_schema).try_into().unwrap();
650
651        assert_eq!(struct_type[0].id, 1);
652        assert_eq!(struct_type[0].name, "field1");
653        assert!(struct_type[0].required);
654        assert_eq!(
655            struct_type[0].field_type,
656            Type::Primitive(PrimitiveType::Int)
657        );
658
659        match &struct_type[1].field_type {
660            Type::List(list_type) => {
661                assert_eq!(list_type.element_id, 3);
662                assert!(!list_type.element_required);
663                assert_eq!(*list_type.element, Type::Primitive(PrimitiveType::Double));
664            }
665            _ => panic!("Expected list type"),
666        }
667    }
668
669    // #[test]
670    // fn test_arrow_schema_to_struct_type_map() {
671    //     let arrow_schema = ArrowSchema::new(vec![
672    //         Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
673    //             PARQUET_FIELD_ID_META_KEY.to_string(),
674    //             "1".to_string(),
675    //         )])),
676    //         Field::new(
677    //             "field2",
678    //             DataType::Map(
679    //                 Arc::new(Field::new(
680    //                     "entries",
681    //                     DataType::Struct(Fields::from(vec![
682    //                         Field::new("key", DataType::Utf8, false).with_metadata(HashMap::from(
683    //                             [(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())],
684    //                         )),
685    //                         Field::new("value", DataType::Int32, true).with_metadata(
686    //                             HashMap::from([(
687    //                                 PARQUET_FIELD_ID_META_KEY.to_string(),
688    //                                 "4".to_string(),
689    //                             )]),
690    //                         ),
691    //                     ])),
692    //                     false,
693    //                 )),
694    //                 false,
695    //             ),
696    //             true,
697    //         )
698    //         .with_metadata(HashMap::from([(
699    //             PARQUET_FIELD_ID_META_KEY.to_string(),
700    //             "2".to_string(),
701    //         )])),
702    //     ]);
703
704    //     let struct_type: StructType = (&arrow_schema).try_into().unwrap();
705
706    //     assert_eq!(struct_type[0].id, 1);
707    //     assert_eq!(struct_type[0].name, "field1");
708    //     assert_eq!(struct_type[0].required, true);
709    //     assert_eq!(
710    //         struct_type[0].field_type,
711    //         Type::Primitive(PrimitiveType::Int)
712    //     );
713
714    //     match &struct_type[1].field_type {
715    //         Type::Map(map_type) => {
716    //             assert_eq!(map_type.key_id, 3);
717    //             assert_eq!(map_type.value_id, 4);
718    //             assert_eq!(map_type.value_required, false);
719    //             assert_eq!(*map_type.key, Type::Primitive(PrimitiveType::String));
720    //             assert_eq!(*map_type.value, Type::Primitive(PrimitiveType::Int));
721    //         }
722    //         _ => panic!("Expected map type"),
723    //     }
724    // }
725
726    // #[test]
727    // fn test_arrow_schema_to_struct_type_complex() {
728    //     let nested_fields = Fields::from(vec![
729    //         Field::new("nested1", DataType::Int64, true).with_metadata(HashMap::from([(
730    //             PARQUET_FIELD_ID_META_KEY.to_string(),
731    //             "4".to_string(),
732    //         )])),
733    //         Field::new("nested2", DataType::Boolean, false).with_metadata(HashMap::from([(
734    //             PARQUET_FIELD_ID_META_KEY.to_string(),
735    //             "5".to_string(),
736    //         )])),
737    //     ]);
738
739    //     let arrow_schema = ArrowSchema::new(vec![
740    //         Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
741    //             PARQUET_FIELD_ID_META_KEY.to_string(),
742    //             "1".to_string(),
743    //         )])),
744    //         Field::new(
745    //             "field2",
746    //             DataType::List(Arc::new(
747    //                 Field::new("item", DataType::Struct(nested_fields), false).with_metadata(
748    //                     HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())]),
749    //                 ),
750    //             )),
751    //             true,
752    //         )
753    //         .with_metadata(HashMap::from([(
754    //             PARQUET_FIELD_ID_META_KEY.to_string(),
755    //             "2".to_string(),
756    //         )])),
757    //         Field::new(
758    //             "field3",
759    //             DataType::Map(
760    //                 Arc::new(Field::new(
761    //                     "entries",
762    //                     DataType::Struct(Fields::from(vec![
763    //                         Field::new("key", DataType::Utf8, false).with_metadata(HashMap::from(
764    //                             [(PARQUET_FIELD_ID_META_KEY.to_string(), "7".to_string())],
765    //                         )),
766    //                         Field::new("value", DataType::Date32, true).with_metadata(
767    //                             HashMap::from([(
768    //                                 PARQUET_FIELD_ID_META_KEY.to_string(),
769    //                                 "8".to_string(),
770    //                             )]),
771    //                         ),
772    //                     ])),
773    //                     false,
774    //                 )),
775    //                 false,
776    //             ),
777    //             false,
778    //         )
779    //         .with_metadata(HashMap::from([(
780    //             PARQUET_FIELD_ID_META_KEY.to_string(),
781    //             "6".to_string(),
782    //         )])),
783    //     ]);
784
785    //     let struct_type: StructType = (&arrow_schema).try_into().unwrap();
786
787    //     // Check field1
788    //     assert_eq!(struct_type[0].id, 1);
789    //     assert_eq!(struct_type[0].name, "field1");
790    //     assert_eq!(struct_type[0].required, true);
791    //     assert_eq!(
792    //         struct_type[0].field_type,
793    //         Type::Primitive(PrimitiveType::Int)
794    //     );
795
796    //     // Check field2 (list of structs)
797    //     match &struct_type[1].field_type {
798    //         Type::List(list_type) => {
799    //             assert_eq!(list_type.element_id, 3);
800    //             assert_eq!(list_type.element_required, true);
801    //             match &*list_type.element {
802    //                 Type::Struct(nested_struct) => {
803    //                     assert_eq!(nested_struct[0].id, 4);
804    //                     assert_eq!(nested_struct[0].name, "nested1");
805    //                     assert_eq!(nested_struct[0].required, false);
806    //                     assert_eq!(
807    //                         nested_struct[0].field_type,
808    //                         Type::Primitive(PrimitiveType::Long)
809    //                     );
810    //                     assert_eq!(nested_struct[1].id, 5);
811    //                     assert_eq!(nested_struct[1].name, "nested2");
812    //                     assert_eq!(nested_struct[1].required, true);
813    //                     assert_eq!(
814    //                         nested_struct[1].field_type,
815    //                         Type::Primitive(PrimitiveType::Boolean)
816    //                     );
817    //                 }
818    //                 _ => panic!("Expected nested struct in list"),
819    //             }
820    //         }
821    //         _ => panic!("Expected list type"),
822    //     }
823
824    //     // Check field3 (map)
825    //     match &struct_type[2].field_type {
826    //         Type::Map(map_type) => {
827    //             assert_eq!(map_type.key_id, 7);
828    //             assert_eq!(map_type.value_id, 8);
829    //             assert_eq!(map_type.value_required, false);
830    //             assert_eq!(*map_type.key, Type::Primitive(PrimitiveType::String));
831    //             assert_eq!(*map_type.value, Type::Primitive(PrimitiveType::Date));
832    //         }
833    //         _ => panic!("Expected map type"),
834    //     }
835    // }
836
837    #[test]
838    fn test_arrow_schema_to_struct_type_missing_field_id() {
839        let arrow_schema = ArrowSchema::new(vec![Field::new("field1", DataType::Int32, false)]);
840
841        let result: Result<StructType, Error> = (&arrow_schema).try_into();
842        assert!(result.is_err());
843        assert!(matches!(result.unwrap_err(), Error::NotFound(_)));
844    }
845
846    #[test]
847    fn test_arrow_schema_to_struct_type_invalid_field_id() {
848        let arrow_schema = ArrowSchema::new(vec![Field::new("field1", DataType::Int32, false)
849            .with_metadata(HashMap::from([(
850                PARQUET_FIELD_ID_META_KEY.to_string(),
851                "invalid".to_string(),
852            )]))]);
853
854        let result: Result<StructType, Error> = (&arrow_schema).try_into();
855        assert!(result.is_err());
856    }
857
858    #[test]
859    fn test_arrow_schema_to_struct_type_unsupported_datatype() {
860        let arrow_schema = ArrowSchema::new(vec![Field::new("field1", DataType::UInt8, false)
861            .with_metadata(HashMap::from([(
862                PARQUET_FIELD_ID_META_KEY.to_string(),
863                "1".to_string(),
864            )]))]);
865
866        let result: Result<StructType, Error> = (&arrow_schema).try_into();
867        assert!(result.is_err());
868        assert!(matches!(result.unwrap_err(), Error::NotSupported(_)));
869    }
870
871    #[test]
872    fn test_nested_field_name() {
873        let schema = crate::schema::Schema::builder()
874            .with_schema_id(1)
875            .with_struct_field(StructField::new(
876                1,
877                "nested_object",
878                true,
879                Type::Struct(StructType::new(vec![
880                    StructField::new(
881                        2,
882                        "key1",
883                        true,
884                        Type::Primitive(PrimitiveType::String),
885                        None,
886                    ),
887                    StructField::new(3, "key2", true, Type::Primitive(PrimitiveType::Int), None),
888                ])),
889                None,
890            ))
891            .build()
892            .unwrap();
893
894        let field_name = schema.get_name("nested_object.key1");
895        assert!(field_name.is_some());
896    }
897}