1use std::{collections::HashMap, convert::TryInto, ops::Deref, sync::Arc};
6
7use crate::{
8 spec::types::{PrimitiveType, StructField, StructType, Type},
9 types::ListType,
10};
11use arrow_schema::{DataType, Field, Fields, Schema as ArrowSchema, TimeUnit};
12
13use crate::error::Error;
14
15pub const PARQUET_FIELD_ID_META_KEY: &str = "PARQUET:field_id";
16
17impl TryInto<ArrowSchema> for &StructType {
18 type Error = Error;
19
20 fn try_into(self) -> Result<ArrowSchema, Self::Error> {
21 let fields = self.try_into()?;
22 let metadata = HashMap::new();
23 Ok(ArrowSchema { fields, metadata })
24 }
25}
26
27impl TryInto<Fields> for &StructType {
28 type Error = Error;
29
30 fn try_into(self) -> Result<Fields, Self::Error> {
31 let fields = self
32 .iter()
33 .map(|field| {
34 Ok(Field::new(
35 &field.name,
36 (&field.field_type).try_into()?,
37 !field.required,
38 )
39 .with_metadata(HashMap::from_iter(vec![(
40 PARQUET_FIELD_ID_META_KEY.to_string(),
41 field.id.to_string(),
42 )])))
43 })
44 .collect::<Result<_, Error>>()?;
45 Ok(fields)
46 }
47}
48
49impl TryFrom<&ArrowSchema> for StructType {
50 type Error = Error;
51
52 fn try_from(value: &ArrowSchema) -> Result<Self, Self::Error> {
53 value.fields().try_into()
54 }
55}
56
57impl TryFrom<&Fields> for StructType {
58 type Error = Error;
59
60 fn try_from(value: &Fields) -> Result<Self, Self::Error> {
61 let fields = value
62 .iter()
63 .map(|field| {
64 Ok(StructField {
65 id: get_field_id(field)?,
66 name: field.name().to_owned(),
67 required: !field.is_nullable(),
68 field_type: field.data_type().try_into()?,
69 doc: None,
70 })
71 })
72 .collect::<Result<_, Error>>()?;
73 Ok(StructType::new(fields))
74 }
75}
76
77impl TryFrom<&Type> for DataType {
78 type Error = Error;
79
80 fn try_from(value: &Type) -> Result<Self, Self::Error> {
81 match value {
82 Type::Primitive(primitive) => match primitive {
83 PrimitiveType::Boolean => Ok(DataType::Boolean),
84 PrimitiveType::Int => Ok(DataType::Int32),
85 PrimitiveType::Long => Ok(DataType::Int64),
86 PrimitiveType::Float => Ok(DataType::Float32),
87 PrimitiveType::Double => Ok(DataType::Float64),
88 PrimitiveType::Decimal { precision, scale } => {
89 Ok(DataType::Decimal128(*precision as u8, *scale as i8))
90 }
91 PrimitiveType::Date => Ok(DataType::Date32),
92 PrimitiveType::Time => Ok(DataType::Time64(TimeUnit::Microsecond)),
93 PrimitiveType::Timestamp => Ok(DataType::Timestamp(TimeUnit::Microsecond, None)),
94 PrimitiveType::Timestamptz => Ok(DataType::Timestamp(
95 TimeUnit::Microsecond,
96 Some(Arc::from("UTC")),
97 )),
98 PrimitiveType::String => Ok(DataType::Utf8),
99 PrimitiveType::Uuid => Ok(DataType::Utf8),
100 PrimitiveType::Fixed(len) => Ok(DataType::FixedSizeBinary(*len as i32)),
101 PrimitiveType::Binary => Ok(DataType::Binary),
102 },
103 Type::List(list) => Ok(DataType::List(Arc::new(
104 Field::new(
105 "item",
106 (&list.element as &Type).try_into()?,
107 !list.element_required,
108 )
109 .with_metadata(HashMap::from_iter(vec![(
110 PARQUET_FIELD_ID_META_KEY.to_string(),
111 list.element_id.to_string(),
112 )])),
113 ))),
114 Type::Struct(struc) => Ok(DataType::Struct(struc.try_into()?)),
115 Type::Map(map) => Ok(DataType::Map(
116 Arc::new(Field::new(
117 "entries",
118 DataType::Struct(Fields::from(vec![
119 Field::new("key", (&map.key as &Type).try_into()?, false).with_metadata(
120 HashMap::from_iter(vec![(
121 PARQUET_FIELD_ID_META_KEY.to_string(),
122 map.key_id.to_string(),
123 )]),
124 ),
125 Field::new(
126 "value",
127 (&map.value as &Type).try_into()?,
128 !map.value_required,
129 )
130 .with_metadata(HashMap::from_iter(vec![(
131 PARQUET_FIELD_ID_META_KEY.to_string(),
132 map.value_id.to_string(),
133 )])),
134 ])),
135 false,
136 )),
137 false,
138 )),
139 }
140 }
141}
142
143impl TryFrom<&DataType> for Type {
144 type Error = Error;
145
146 fn try_from(value: &DataType) -> Result<Self, Self::Error> {
147 match value {
148 DataType::Boolean => Ok(Type::Primitive(PrimitiveType::Boolean)),
149 DataType::Int8 | DataType::Int16 | DataType::Int32 => {
150 Ok(Type::Primitive(PrimitiveType::Int))
151 }
152 DataType::Int64 => Ok(Type::Primitive(PrimitiveType::Long)),
153 DataType::Float32 => Ok(Type::Primitive(PrimitiveType::Float)),
154 DataType::Float64 => Ok(Type::Primitive(PrimitiveType::Double)),
155 DataType::Decimal128(precision, scale) => Ok(Type::Primitive(PrimitiveType::Decimal {
156 precision: *precision as u32,
157 scale: *scale as u32,
158 })),
159 DataType::Date32 => Ok(Type::Primitive(PrimitiveType::Date)),
160 DataType::Time64(_) => Ok(Type::Primitive(PrimitiveType::Time)),
161 DataType::Timestamp(_, _) => Ok(Type::Primitive(PrimitiveType::Timestamp)),
162 DataType::Utf8 => Ok(Type::Primitive(PrimitiveType::String)),
163 DataType::Utf8View => Ok(Type::Primitive(PrimitiveType::String)),
164 DataType::FixedSizeBinary(len) => {
165 Ok(Type::Primitive(PrimitiveType::Fixed(*len as u64)))
166 }
167 DataType::Binary => Ok(Type::Primitive(PrimitiveType::Binary)),
168 DataType::Struct(fields) => Ok(Type::Struct(fields.try_into()?)),
169 DataType::List(field) => Ok(Type::List(ListType {
170 element_id: get_field_id(field)?,
171 element_required: !field.is_nullable(),
172 element: Box::new(field.data_type().try_into()?),
173 })),
174 x => Err(Error::NotSupported(format!(
175 "Arrow datatype {x} is not supported."
176 ))),
177 }
178 }
179}
180
181fn get_field_id(field: &Field) -> Result<i32, Error> {
182 field
183 .metadata()
184 .get(PARQUET_FIELD_ID_META_KEY)
185 .ok_or(Error::NotFound(format!(
186 "Parquet field id of field {field}"
187 )))
188 .and_then(|x| x.parse().map_err(Error::from))
189}
190
191pub fn new_fields_with_ids(fields: &Fields, index: &mut i32) -> Fields {
192 fields
193 .into_iter()
194 .map(|field| {
195 *index += 1;
196 match field.data_type() {
197 DataType::Struct(fields) => {
198 let temp = *index;
199 Field::new(
200 field.name(),
201 DataType::Struct(new_fields_with_ids(fields, index)),
202 field.is_nullable(),
203 )
204 .with_metadata(HashMap::from_iter(vec![(
205 PARQUET_FIELD_ID_META_KEY.to_string(),
206 temp.to_string(),
207 )]))
208 }
209 DataType::List(list_field) => {
210 let temp = *index;
211 *index += 1;
212 Field::new(
213 field.name(),
214 DataType::List(Arc::new(list_field.deref().clone().with_metadata(
215 HashMap::from_iter(vec![(
216 PARQUET_FIELD_ID_META_KEY.to_string(),
217 index.to_string(),
218 )]),
219 ))),
220 field.is_nullable(),
221 )
222 .with_metadata(HashMap::from_iter(vec![(
223 PARQUET_FIELD_ID_META_KEY.to_string(),
224 temp.to_string(),
225 )]))
226 }
227 _ => field
228 .deref()
229 .clone()
230 .with_metadata(HashMap::from_iter(vec![(
231 PARQUET_FIELD_ID_META_KEY.to_string(),
232 index.to_string(),
233 )])),
234 }
235 })
236 .collect()
237}
238
239#[cfg(test)]
240mod tests {
241 use super::*;
242 use crate::spec::types::MapType;
243
244 #[test]
245 fn test_struct_type_to_arrow_schema_simple() {
246 let struct_type = StructType::new(vec![
247 StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
248 StructField::new(
249 2,
250 "field2",
251 false,
252 Type::Primitive(PrimitiveType::String),
253 None,
254 ),
255 ]);
256
257 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
258
259 assert_eq!(arrow_schema.fields().len(), 2);
260 assert_eq!(arrow_schema.field(0).name(), "field1");
261 assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
262 assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
263 assert!(!arrow_schema.field(0).is_nullable());
264 assert_eq!(arrow_schema.field(1).name(), "field2");
265 assert_eq!(get_field_id(arrow_schema.field(1)).unwrap(), 2);
266 assert_eq!(arrow_schema.field(1).data_type(), &DataType::Utf8);
267 assert!(arrow_schema.field(1).is_nullable());
268 }
269
270 #[test]
271 fn test_struct_type_to_arrow_schema_nested() {
272 let nested_struct = StructType::new(vec![
273 StructField::new(
274 3,
275 "nested1",
276 true,
277 Type::Primitive(PrimitiveType::Long),
278 None,
279 ),
280 StructField::new(
281 4,
282 "nested2",
283 false,
284 Type::Primitive(PrimitiveType::Boolean),
285 None,
286 ),
287 ]);
288
289 let struct_type = StructType::new(vec![
290 StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
291 StructField::new(2, "field2", false, Type::Struct(nested_struct), None),
292 ]);
293
294 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
295
296 assert_eq!(arrow_schema.fields().len(), 2);
297 assert_eq!(arrow_schema.field(0).name(), "field1");
298 assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
299 assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
300 assert!(!arrow_schema.field(0).is_nullable());
301
302 let nested_field = arrow_schema.field(1);
303 assert_eq!(nested_field.name(), "field2");
304 assert_eq!(get_field_id(nested_field).unwrap(), 2);
305 assert!(nested_field.is_nullable());
306
307 if let DataType::Struct(nested_fields) = nested_field.data_type() {
308 assert_eq!(nested_fields.len(), 2);
309 assert_eq!(nested_fields[0].name(), "nested1");
310 assert_eq!(get_field_id(&nested_fields[0]).unwrap(), 3);
311 assert_eq!(nested_fields[0].data_type(), &DataType::Int64);
312 assert!(!nested_fields[0].is_nullable());
313 assert_eq!(nested_fields[1].name(), "nested2");
314 assert_eq!(get_field_id(&nested_fields[1]).unwrap(), 4);
315 assert_eq!(nested_fields[1].data_type(), &DataType::Boolean);
316 assert!(nested_fields[1].is_nullable());
317 } else {
318 panic!("Expected nested field to be a struct");
319 }
320 }
321
322 #[test]
323 fn test_struct_type_to_arrow_schema_list() {
324 let list_type = Type::List(ListType {
325 element_id: 3,
326 element_required: false,
327 element: Box::new(Type::Primitive(PrimitiveType::Double)),
328 });
329
330 let struct_type = StructType::new(vec![
331 StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
332 StructField::new(2, "field2", false, list_type, None),
333 ]);
334
335 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
336
337 assert_eq!(arrow_schema.fields().len(), 2);
338 assert_eq!(arrow_schema.field(0).name(), "field1");
339 assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
340 assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
341 assert!(!arrow_schema.field(0).is_nullable());
342
343 let list_field = arrow_schema.field(1);
344 assert_eq!(list_field.name(), "field2");
345 assert_eq!(get_field_id(list_field).unwrap(), 2);
346 assert!(list_field.is_nullable());
347
348 if let DataType::List(element_field) = list_field.data_type() {
349 assert_eq!(element_field.data_type(), &DataType::Float64);
350 assert_eq!(get_field_id(element_field).unwrap(), 3);
351 assert!(element_field.is_nullable());
352 } else {
353 panic!("Expected list field");
354 }
355 }
356
357 #[test]
358 fn test_struct_type_to_arrow_schema_map() {
359 let map_type = Type::Map(MapType {
360 key_id: 3,
361 value_id: 4,
362 value_required: false,
363 key: Box::new(Type::Primitive(PrimitiveType::String)),
364 value: Box::new(Type::Primitive(PrimitiveType::Int)),
365 });
366
367 let struct_type = StructType::new(vec![
368 StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
369 StructField::new(2, "field2", false, map_type, None),
370 ]);
371
372 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
373
374 assert_eq!(arrow_schema.fields().len(), 2);
375 assert_eq!(arrow_schema.field(0).name(), "field1");
376 assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
377 assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
378 assert!(!arrow_schema.field(0).is_nullable());
379
380 let map_field = arrow_schema.field(1);
381 assert_eq!(map_field.name(), "field2");
382 assert_eq!(get_field_id(map_field).unwrap(), 2);
383 assert!(map_field.is_nullable());
384
385 if let DataType::Map(entries_field, _) = map_field.data_type() {
386 if let DataType::Struct(entry_fields) = entries_field.data_type() {
387 assert_eq!(entry_fields.len(), 2);
388 assert_eq!(entry_fields[0].name(), "key");
389 assert_eq!(get_field_id(&entry_fields[0]).unwrap(), 3);
390 assert_eq!(entry_fields[0].data_type(), &DataType::Utf8);
391 assert!(!entry_fields[0].is_nullable());
392 assert_eq!(entry_fields[1].name(), "value");
393 assert_eq!(get_field_id(&entry_fields[1]).unwrap(), 4);
394 assert_eq!(entry_fields[1].data_type(), &DataType::Int32);
395 assert!(entry_fields[1].is_nullable());
396 } else {
397 panic!("Expected struct field for map entries");
398 }
399 } else {
400 panic!("Expected map field");
401 }
402 }
403
404 #[test]
405 fn test_struct_type_to_arrow_schema_complex() {
406 let nested_struct = StructType::new(vec![
407 StructField::new(
408 4,
409 "nested1",
410 true,
411 Type::Primitive(PrimitiveType::Long),
412 None,
413 ),
414 StructField::new(
415 5,
416 "nested2",
417 false,
418 Type::Primitive(PrimitiveType::Boolean),
419 None,
420 ),
421 ]);
422
423 let list_type = Type::List(ListType {
424 element_id: 3,
425 element_required: true,
426 element: Box::new(Type::Struct(nested_struct)),
427 });
428
429 let map_type = Type::Map(MapType {
430 key_id: 7,
431 value_id: 8,
432 value_required: false,
433 key: Box::new(Type::Primitive(PrimitiveType::String)),
434 value: Box::new(Type::Primitive(PrimitiveType::Date)),
435 });
436
437 let struct_type = StructType::new(vec![
438 StructField::new(1, "field1", true, Type::Primitive(PrimitiveType::Int), None),
439 StructField::new(2, "field2", false, list_type, None),
440 StructField::new(6, "field3", true, map_type, None),
441 ]);
442
443 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
444
445 assert_eq!(arrow_schema.fields().len(), 3);
446 assert_eq!(arrow_schema.field(0).name(), "field1");
448 assert_eq!(get_field_id(arrow_schema.field(0)).unwrap(), 1);
449 assert_eq!(arrow_schema.field(0).data_type(), &DataType::Int32);
450 assert!(!arrow_schema.field(0).is_nullable());
451
452 let list_field = arrow_schema.field(1);
454 assert_eq!(list_field.name(), "field2");
455 assert_eq!(get_field_id(list_field).unwrap(), 2);
456 assert!(list_field.is_nullable());
457 if let DataType::List(element_field) = list_field.data_type() {
458 if let DataType::Struct(nested_fields) = element_field.data_type() {
459 assert_eq!(nested_fields.len(), 2);
460 assert_eq!(nested_fields[0].name(), "nested1");
461 assert_eq!(get_field_id(&nested_fields[0]).unwrap(), 4);
462 assert_eq!(nested_fields[0].data_type(), &DataType::Int64);
463 assert!(!nested_fields[0].is_nullable());
464 assert_eq!(nested_fields[1].name(), "nested2");
465 assert_eq!(get_field_id(&nested_fields[1]).unwrap(), 5);
466 assert_eq!(nested_fields[1].data_type(), &DataType::Boolean);
467 assert!(nested_fields[1].is_nullable());
468 } else {
469 panic!("Expected struct as list element");
470 }
471 } else {
472 panic!("Expected list field");
473 }
474
475 let map_field = arrow_schema.field(2);
477 assert_eq!(map_field.name(), "field3");
478 assert_eq!(get_field_id(map_field).unwrap(), 6);
479 assert!(!map_field.is_nullable());
480 if let DataType::Map(entries_field, _) = map_field.data_type() {
481 if let DataType::Struct(entry_fields) = entries_field.data_type() {
482 assert_eq!(entry_fields.len(), 2);
483 assert_eq!(entry_fields[0].name(), "key");
484 assert_eq!(get_field_id(&entry_fields[0]).unwrap(), 7);
485 assert_eq!(entry_fields[0].data_type(), &DataType::Utf8);
486 assert!(!entry_fields[0].is_nullable());
487
488 assert_eq!(entry_fields[1].name(), "value");
490 assert_eq!(get_field_id(&entry_fields[1]).unwrap(), 8);
491 assert!(entry_fields[1].is_nullable());
492 } else {
493 panic!("Expected struct field for map entries");
494 }
495 } else {
496 panic!("Expected map field");
497 }
498 }
499
500 #[test]
501 fn test_struct_type_to_arrow_schema_empty() {
502 let struct_type = StructType::new(vec![]);
503 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
504 assert_eq!(arrow_schema.fields().len(), 0);
505 }
506
507 #[test]
508 fn test_struct_type_to_arrow_schema_metadata() {
509 let struct_type = StructType::new(vec![StructField::new(
510 1,
511 "field1",
512 true,
513 Type::Primitive(PrimitiveType::Int),
514 None,
515 )]);
516
517 let arrow_schema: ArrowSchema = (&struct_type).try_into().unwrap();
518
519 let field_metadata = arrow_schema.field(0).metadata();
521 assert_eq!(
522 field_metadata.get(PARQUET_FIELD_ID_META_KEY),
523 Some(&"1".to_string())
524 );
525 }
526
527 use arrow_schema::DataType;
528 use std::sync::Arc;
529
530 #[test]
531 fn test_arrow_schema_to_struct_type_simple() {
532 let arrow_schema = ArrowSchema::new(vec![
533 Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
534 PARQUET_FIELD_ID_META_KEY.to_string(),
535 "1".to_string(),
536 )])),
537 Field::new("field2", DataType::Utf8, true).with_metadata(HashMap::from([(
538 PARQUET_FIELD_ID_META_KEY.to_string(),
539 "2".to_string(),
540 )])),
541 Field::new("field3", DataType::Int16, true).with_metadata(HashMap::from([(
542 PARQUET_FIELD_ID_META_KEY.to_string(),
543 "3".to_string(),
544 )])),
545 ]);
546
547 let struct_type: StructType = (&arrow_schema).try_into().unwrap();
548
549 assert_eq!(struct_type[0].id, 1);
550 assert_eq!(struct_type[0].name, "field1");
551 assert!(struct_type[0].required);
552 assert_eq!(
553 struct_type[0].field_type,
554 Type::Primitive(PrimitiveType::Int)
555 );
556 assert_eq!(struct_type[1].id, 2);
557 assert_eq!(struct_type[1].name, "field2");
558 assert!(!struct_type[1].required);
559 assert_eq!(
560 struct_type[1].field_type,
561 Type::Primitive(PrimitiveType::String)
562 );
563 assert_eq!(struct_type[2].id, 3);
564 assert_eq!(struct_type[2].name, "field3");
565 assert!(!struct_type[2].required);
566 assert_eq!(
567 struct_type[2].field_type,
568 Type::Primitive(PrimitiveType::Int)
569 );
570 }
571
572 #[test]
573 fn test_arrow_schema_to_struct_type_nested() {
574 let nested_fields = Fields::from(vec![
575 Field::new("nested1", DataType::Int64, true).with_metadata(HashMap::from([(
576 PARQUET_FIELD_ID_META_KEY.to_string(),
577 "3".to_string(),
578 )])),
579 Field::new("nested2", DataType::Boolean, false).with_metadata(HashMap::from([(
580 PARQUET_FIELD_ID_META_KEY.to_string(),
581 "4".to_string(),
582 )])),
583 ]);
584
585 let arrow_schema = ArrowSchema::new(vec![
586 Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
587 PARQUET_FIELD_ID_META_KEY.to_string(),
588 "1".to_string(),
589 )])),
590 Field::new("field2", DataType::Struct(nested_fields), true).with_metadata(
591 HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string())]),
592 ),
593 ]);
594
595 let struct_type: StructType = (&arrow_schema).try_into().unwrap();
596
597 assert_eq!(struct_type[0].id, 1);
598 assert_eq!(struct_type[0].name, "field1");
599 assert!(struct_type[0].required);
600 assert_eq!(
601 struct_type[0].field_type,
602 Type::Primitive(PrimitiveType::Int)
603 );
604
605 match &struct_type[1].field_type {
606 Type::Struct(nested_struct) => {
607 assert_eq!(nested_struct[0].id, 3);
608 assert_eq!(nested_struct[0].name, "nested1");
609 assert!(!nested_struct[0].required);
610 assert_eq!(
611 nested_struct[0].field_type,
612 Type::Primitive(PrimitiveType::Long)
613 );
614 assert_eq!(nested_struct[1].id, 4);
615 assert_eq!(nested_struct[1].name, "nested2");
616 assert!(nested_struct[1].required);
617 assert_eq!(
618 nested_struct[1].field_type,
619 Type::Primitive(PrimitiveType::Boolean)
620 );
621 }
622 _ => panic!("Expected nested struct"),
623 }
624 }
625
626 #[test]
627 fn test_arrow_schema_to_struct_type_list() {
628 let arrow_schema = ArrowSchema::new(vec![
629 Field::new("field1", DataType::Int32, false).with_metadata(HashMap::from([(
630 PARQUET_FIELD_ID_META_KEY.to_string(),
631 "1".to_string(),
632 )])),
633 Field::new(
634 "field2",
635 DataType::List(Arc::new(
636 Field::new("item", DataType::Float64, true).with_metadata(HashMap::from([(
637 PARQUET_FIELD_ID_META_KEY.to_string(),
638 "3".to_string(),
639 )])),
640 )),
641 true,
642 )
643 .with_metadata(HashMap::from([(
644 PARQUET_FIELD_ID_META_KEY.to_string(),
645 "2".to_string(),
646 )])),
647 ]);
648
649 let struct_type: StructType = (&arrow_schema).try_into().unwrap();
650
651 assert_eq!(struct_type[0].id, 1);
652 assert_eq!(struct_type[0].name, "field1");
653 assert!(struct_type[0].required);
654 assert_eq!(
655 struct_type[0].field_type,
656 Type::Primitive(PrimitiveType::Int)
657 );
658
659 match &struct_type[1].field_type {
660 Type::List(list_type) => {
661 assert_eq!(list_type.element_id, 3);
662 assert!(!list_type.element_required);
663 assert_eq!(*list_type.element, Type::Primitive(PrimitiveType::Double));
664 }
665 _ => panic!("Expected list type"),
666 }
667 }
668
669 #[test]
838 fn test_arrow_schema_to_struct_type_missing_field_id() {
839 let arrow_schema = ArrowSchema::new(vec![Field::new("field1", DataType::Int32, false)]);
840
841 let result: Result<StructType, Error> = (&arrow_schema).try_into();
842 assert!(result.is_err());
843 assert!(matches!(result.unwrap_err(), Error::NotFound(_)));
844 }
845
846 #[test]
847 fn test_arrow_schema_to_struct_type_invalid_field_id() {
848 let arrow_schema = ArrowSchema::new(vec![Field::new("field1", DataType::Int32, false)
849 .with_metadata(HashMap::from([(
850 PARQUET_FIELD_ID_META_KEY.to_string(),
851 "invalid".to_string(),
852 )]))]);
853
854 let result: Result<StructType, Error> = (&arrow_schema).try_into();
855 assert!(result.is_err());
856 }
857
858 #[test]
859 fn test_arrow_schema_to_struct_type_unsupported_datatype() {
860 let arrow_schema = ArrowSchema::new(vec![Field::new("field1", DataType::UInt8, false)
861 .with_metadata(HashMap::from([(
862 PARQUET_FIELD_ID_META_KEY.to_string(),
863 "1".to_string(),
864 )]))]);
865
866 let result: Result<StructType, Error> = (&arrow_schema).try_into();
867 assert!(result.is_err());
868 assert!(matches!(result.unwrap_err(), Error::NotSupported(_)));
869 }
870
871 #[test]
872 fn test_nested_field_name() {
873 let schema = crate::schema::Schema::builder()
874 .with_schema_id(1)
875 .with_struct_field(StructField::new(
876 1,
877 "nested_object",
878 true,
879 Type::Struct(StructType::new(vec![
880 StructField::new(
881 2,
882 "key1",
883 true,
884 Type::Primitive(PrimitiveType::String),
885 None,
886 ),
887 StructField::new(3, "key2", true, Type::Primitive(PrimitiveType::Int), None),
888 ])),
889 None,
890 ))
891 .build()
892 .unwrap();
893
894 let field_name = schema.get_name("nested_object.key1");
895 assert!(field_name.is_some());
896 }
897}