use parquet2::schema::{
types::{
BasicTypeInfo, GroupConvertedType, LogicalType, ParquetType, PhysicalType,
PrimitiveConvertedType, TimeUnit as ParquetTimeUnit, TimestampType,
},
Repetition,
};
use crate::datatypes::{DataType, Field, IntervalUnit, TimeUnit};
pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec<Field> {
fields.iter().filter_map(to_field).collect::<Vec<_>>()
}
fn from_int32(
logical_type: &Option<LogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
match (logical_type, converted_type) {
(Some(LogicalType::INTEGER(t)), _) => match (t.bit_width, t.is_signed) {
(8, true) => DataType::Int8,
(16, true) => DataType::Int16,
(32, true) => DataType::Int32,
(8, false) => DataType::UInt8,
(16, false) => DataType::UInt16,
(32, false) => DataType::UInt32,
_ => DataType::Int32,
},
(Some(LogicalType::DECIMAL(t)), _) => {
DataType::Decimal(t.precision as usize, t.scale as usize)
}
(Some(LogicalType::DATE(_)), _) => DataType::Date32,
(Some(LogicalType::TIME(t)), _) => match t.unit {
ParquetTimeUnit::MILLIS(_) => DataType::Time32(TimeUnit::Millisecond),
_ => DataType::Int32,
},
(_, Some(PrimitiveConvertedType::Uint8)) => DataType::UInt8,
(_, Some(PrimitiveConvertedType::Uint16)) => DataType::UInt16,
(_, Some(PrimitiveConvertedType::Uint32)) => DataType::UInt32,
(_, Some(PrimitiveConvertedType::Int8)) => DataType::Int8,
(_, Some(PrimitiveConvertedType::Int16)) => DataType::Int16,
(_, Some(PrimitiveConvertedType::Int32)) => DataType::Int32,
(_, Some(PrimitiveConvertedType::Date)) => DataType::Date32,
(_, Some(PrimitiveConvertedType::TimeMillis)) => DataType::Time32(TimeUnit::Millisecond),
(_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
DataType::Decimal(*precision as usize, *scale as usize)
}
(_, _) => DataType::Int32,
}
}
fn from_int64(
logical_type: &Option<LogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
match (logical_type, converted_type) {
(Some(LogicalType::INTEGER(t)), _) if t.bit_width == 64 => match t.is_signed {
true => DataType::Int64,
false => DataType::UInt64,
},
(
Some(LogicalType::TIMESTAMP(TimestampType {
is_adjusted_to_u_t_c,
unit,
})),
_,
) => {
let timezone = if *is_adjusted_to_u_t_c {
Some("+00:00".to_string())
} else {
None
};
match unit {
ParquetTimeUnit::MILLIS(_) => DataType::Timestamp(TimeUnit::Millisecond, timezone),
ParquetTimeUnit::MICROS(_) => DataType::Timestamp(TimeUnit::Microsecond, timezone),
ParquetTimeUnit::NANOS(_) => DataType::Timestamp(TimeUnit::Nanosecond, timezone),
}
}
(Some(LogicalType::TIME(t)), _) => match t.unit {
ParquetTimeUnit::MICROS(_) => DataType::Time64(TimeUnit::Microsecond),
ParquetTimeUnit::NANOS(_) => DataType::Time64(TimeUnit::Nanosecond),
_ => DataType::Int64,
},
(Some(LogicalType::DECIMAL(t)), _) => {
DataType::Decimal(t.precision as usize, t.scale as usize)
}
(_, Some(PrimitiveConvertedType::TimeMicros)) => DataType::Time64(TimeUnit::Microsecond),
(_, Some(PrimitiveConvertedType::TimestampMillis)) => {
DataType::Timestamp(TimeUnit::Millisecond, None)
}
(_, Some(PrimitiveConvertedType::TimestampMicros)) => {
DataType::Timestamp(TimeUnit::Microsecond, None)
}
(_, Some(PrimitiveConvertedType::Int64)) => DataType::Int64,
(_, Some(PrimitiveConvertedType::Uint64)) => DataType::UInt64,
(_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
DataType::Decimal(*precision as usize, *scale as usize)
}
(_, _) => DataType::Int64,
}
}
fn from_byte_array(
logical_type: &Option<LogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
match (logical_type, converted_type) {
(Some(LogicalType::STRING(_)), _) => DataType::Utf8,
(Some(LogicalType::JSON(_)), _) => DataType::Binary,
(Some(LogicalType::BSON(_)), _) => DataType::Binary,
(Some(LogicalType::ENUM(_)), _) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Json)) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Bson)) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Enum)) => DataType::Binary,
(_, Some(PrimitiveConvertedType::Utf8)) => DataType::Utf8,
(_, _) => DataType::Binary,
}
}
fn from_fixed_len_byte_array(
length: &i32,
logical_type: &Option<LogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
match (logical_type, converted_type) {
(Some(LogicalType::DECIMAL(t)), _) => {
DataType::Decimal(t.precision as usize, t.scale as usize)
}
(None, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
DataType::Decimal(*precision as usize, *scale as usize)
}
(None, Some(PrimitiveConvertedType::Interval)) => {
DataType::Interval(IntervalUnit::DayTime)
}
_ => DataType::FixedSizeBinary(*length as usize),
}
}
fn to_primitive_type_inner(
physical_type: &PhysicalType,
logical_type: &Option<LogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
match physical_type {
PhysicalType::Boolean => DataType::Boolean,
PhysicalType::Int32 => from_int32(logical_type, converted_type),
PhysicalType::Int64 => from_int64(logical_type, converted_type),
PhysicalType::Int96 => DataType::Timestamp(TimeUnit::Nanosecond, None),
PhysicalType::Float => DataType::Float32,
PhysicalType::Double => DataType::Float64,
PhysicalType::ByteArray => from_byte_array(logical_type, converted_type),
PhysicalType::FixedLenByteArray(length) => {
from_fixed_len_byte_array(length, logical_type, converted_type)
}
}
}
fn to_primitive_type(
basic_info: &BasicTypeInfo,
physical_type: &PhysicalType,
logical_type: &Option<LogicalType>,
converted_type: &Option<PrimitiveConvertedType>,
) -> DataType {
let base_type = to_primitive_type_inner(physical_type, logical_type, converted_type);
if basic_info.repetition() == &Repetition::Repeated {
DataType::List(Box::new(Field::new(
basic_info.name(),
base_type,
is_nullable(basic_info),
)))
} else {
base_type
}
}
fn non_repeated_group(
logical_type: &Option<LogicalType>,
converted_type: &Option<GroupConvertedType>,
fields: &[ParquetType],
parent_name: &str,
) -> Option<DataType> {
debug_assert!(!fields.is_empty());
match (logical_type, converted_type) {
(Some(LogicalType::LIST(_)), _) => to_list(fields, parent_name),
(None, Some(GroupConvertedType::List)) => to_list(fields, parent_name),
_ => to_struct(fields),
}
}
fn to_struct(fields: &[ParquetType]) -> Option<DataType> {
let fields = fields.iter().filter_map(to_field).collect::<Vec<Field>>();
if fields.is_empty() {
None
} else {
Some(DataType::Struct(fields))
}
}
fn to_group_type(
basic_info: &BasicTypeInfo,
logical_type: &Option<LogicalType>,
converted_type: &Option<GroupConvertedType>,
fields: &[ParquetType],
parent_name: &str,
) -> Option<DataType> {
debug_assert!(!fields.is_empty());
if basic_info.repetition() == &Repetition::Repeated {
Some(DataType::List(Box::new(Field::new(
basic_info.name(),
to_struct(fields)?,
is_nullable(basic_info),
))))
} else {
non_repeated_group(logical_type, converted_type, fields, parent_name)
}
}
pub(crate) fn is_nullable(basic_info: &BasicTypeInfo) -> bool {
match basic_info.repetition() {
Repetition::Optional => true,
Repetition::Repeated => true,
Repetition::Required => false,
}
}
fn to_field(type_: &ParquetType) -> Option<Field> {
Some(Field::new(
type_.get_basic_info().name(),
to_data_type(type_)?,
is_nullable(type_.get_basic_info()),
))
}
fn to_list(fields: &[ParquetType], parent_name: &str) -> Option<DataType> {
let item = fields.first().unwrap();
let item_type = match item {
ParquetType::PrimitiveType {
physical_type,
logical_type,
converted_type,
..
} => Some(to_primitive_type_inner(
physical_type,
logical_type,
converted_type,
)),
ParquetType::GroupType { fields, .. } => {
if fields.len() == 1
&& item.name() != "array"
&& item.name() != format!("{}_tuple", parent_name)
{
let nested_item = fields.first().unwrap();
to_data_type(nested_item)
} else {
to_struct(fields)
}
}
}?;
let (list_item_name, item_is_optional) = match item {
ParquetType::GroupType {
basic_info, fields, ..
} if basic_info.name() == "list" && fields.len() == 1 => {
let field = fields.first().unwrap();
(
field.name(),
field.get_basic_info().repetition() != &Repetition::Required,
)
}
_ => (
item.name(),
item.get_basic_info().repetition() != &Repetition::Required,
),
};
Some(DataType::List(Box::new(Field::new(
list_item_name,
item_type,
item_is_optional,
))))
}
pub(crate) fn to_data_type(type_: &ParquetType) -> Option<DataType> {
match type_ {
ParquetType::PrimitiveType {
basic_info,
physical_type,
logical_type,
converted_type,
} => Some(to_primitive_type(
basic_info,
physical_type,
logical_type,
converted_type,
)),
ParquetType::GroupType {
basic_info,
logical_type,
converted_type,
fields,
} => {
if fields.is_empty() {
None
} else {
to_group_type(
basic_info,
logical_type,
converted_type,
fields,
basic_info.name(),
)
}
}
}
}
#[cfg(test)]
mod tests {
use parquet2::metadata::SchemaDescriptor;
use super::*;
use crate::datatypes::{DataType, Field, TimeUnit};
use crate::error::Result;
#[test]
fn test_flat_primitives() -> Result<()> {
let message = "
message test_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 int16 (INT_16);
REQUIRED INT32 uint8 (INTEGER(8,false));
REQUIRED INT32 uint16 (INTEGER(16,false));
REQUIRED INT32 int32;
REQUIRED INT64 int64 ;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (UTF8);
OPTIONAL BINARY string_2 (STRING);
}
";
let expected = &[
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("int16", DataType::Int16, false),
Field::new("uint8", DataType::UInt8, false),
Field::new("uint16", DataType::UInt16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new("string_2", DataType::Utf8, true),
];
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(fields, expected);
Ok(())
}
#[test]
fn test_byte_array_fields() -> Result<()> {
let message = "
message test_schema {
REQUIRED BYTE_ARRAY binary;
REQUIRED FIXED_LEN_BYTE_ARRAY (20) fixed_binary;
}
";
let expected = vec![
Field::new("binary", DataType::Binary, false),
Field::new("fixed_binary", DataType::FixedSizeBinary(20), false),
];
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(fields, expected);
Ok(())
}
#[test]
fn test_duplicate_fields() -> Result<()> {
let message = "
message test_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
}
";
let expected = &[
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
];
let parquet_schema = SchemaDescriptor::try_from_message(message)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(fields, expected);
Ok(())
}
#[test]
fn test_parquet_lists() -> Result<()> {
let mut arrow_fields = Vec::new();
let message_type = "
message test_schema {
REQUIRED GROUP my_list (LIST) {
REPEATED GROUP list {
OPTIONAL BINARY element (UTF8);
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP list {
REQUIRED BINARY element (UTF8);
}
}
OPTIONAL GROUP array_of_arrays (LIST) {
REPEATED GROUP list {
REQUIRED GROUP element (LIST) {
REPEATED GROUP list {
REQUIRED INT32 element;
}
}
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP element {
REQUIRED BINARY str (UTF8);
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED INT32 element;
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP element {
REQUIRED BINARY str (UTF8);
REQUIRED INT32 num;
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP array {
REQUIRED BINARY str (UTF8);
}
}
OPTIONAL GROUP my_list (LIST) {
REPEATED GROUP my_list_tuple {
REQUIRED BINARY str (UTF8);
}
}
REPEATED INT32 name;
}
";
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
false,
));
}
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
true,
));
}
{
let arrow_inner_list =
DataType::List(Box::new(Field::new("element", DataType::Int32, false)));
arrow_fields.push(Field::new(
"array_of_arrays",
DataType::List(Box::new(Field::new("element", arrow_inner_list, false))),
true,
));
}
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
true,
));
}
{
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
true,
));
}
{
let arrow_struct = DataType::Struct(vec![
Field::new("str", DataType::Utf8, false),
Field::new("num", DataType::Int32, false),
]);
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("element", arrow_struct, true))),
true,
));
}
{
let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("array", arrow_struct, true))),
true,
));
}
{
let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
arrow_fields.push(Field::new(
"my_list",
DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))),
true,
));
}
{
arrow_fields.push(Field::new(
"name",
DataType::List(Box::new(Field::new("name", DataType::Int32, true))),
true,
));
}
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_parquet_list_nullable() -> Result<()> {
let mut arrow_fields = Vec::new();
let message_type = "
message test_schema {
REQUIRED GROUP my_list1 (LIST) {
REPEATED GROUP list {
OPTIONAL BINARY element (UTF8);
}
}
OPTIONAL GROUP my_list2 (LIST) {
REPEATED GROUP list {
REQUIRED BINARY element (UTF8);
}
}
REQUIRED GROUP my_list3 (LIST) {
REPEATED GROUP list {
REQUIRED BINARY element (UTF8);
}
}
}
";
{
arrow_fields.push(Field::new(
"my_list1",
DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
false,
));
}
{
arrow_fields.push(Field::new(
"my_list2",
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
true,
));
}
{
arrow_fields.push(Field::new(
"my_list3",
DataType::List(Box::new(Field::new("element", DataType::Utf8, false))),
false,
));
}
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_nested_schema() -> Result<()> {
let mut arrow_fields = Vec::new();
{
let group1_fields = vec![
Field::new("leaf1", DataType::Boolean, false),
Field::new("leaf2", DataType::Int32, false),
];
let group1_struct = Field::new("group1", DataType::Struct(group1_fields), false);
arrow_fields.push(group1_struct);
let leaf3_field = Field::new("leaf3", DataType::Int64, false);
arrow_fields.push(leaf3_field);
}
let message_type = "
message test_schema {
REQUIRED GROUP group1 {
REQUIRED BOOLEAN leaf1;
REQUIRED INT32 leaf2;
}
REQUIRED INT64 leaf3;
}
";
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_repeated_nested_schema() -> Result<()> {
let mut arrow_fields = Vec::new();
{
arrow_fields.push(Field::new("leaf1", DataType::Int32, true));
let inner_group_list = Field::new(
"innerGroup",
DataType::List(Box::new(Field::new(
"innerGroup",
DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]),
true,
))),
true,
);
let outer_group_list = Field::new(
"outerGroup",
DataType::List(Box::new(Field::new(
"outerGroup",
DataType::Struct(vec![
Field::new("leaf2", DataType::Int32, true),
inner_group_list,
]),
true,
))),
true,
);
arrow_fields.push(outer_group_list);
}
let message_type = "
message test_schema {
OPTIONAL INT32 leaf1;
REPEATED GROUP outerGroup {
OPTIONAL INT32 leaf2;
REPEATED GROUP innerGroup {
OPTIONAL INT32 leaf3;
}
}
}
";
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_column_desc_to_field() -> Result<()> {
let message_type = "
message test_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 uint8 (INTEGER(8,false));
REQUIRED INT32 int16 (INT_16);
REQUIRED INT32 uint16 (INTEGER(16,false));
REQUIRED INT32 int32;
REQUIRED INT64 int64;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (UTF8);
REPEATED BOOLEAN bools;
OPTIONAL INT32 date (DATE);
OPTIONAL INT32 time_milli (TIME_MILLIS);
OPTIONAL INT64 time_micro (TIME_MICROS);
OPTIONAL INT64 time_nano (TIME(NANOS,false));
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
REQUIRED INT64 ts_micro (TIMESTAMP_MICROS);
REQUIRED INT64 ts_nano (TIMESTAMP(NANOS,true));
}
";
let arrow_fields = vec![
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("uint8", DataType::UInt8, false),
Field::new("int16", DataType::Int16, false),
Field::new("uint16", DataType::UInt16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new(
"bools",
DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))),
true,
),
Field::new("date", DataType::Date32, true),
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nano", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new(
"ts_milli",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micro",
DataType::Timestamp(TimeUnit::Microsecond, None),
false,
),
Field::new(
"ts_nano",
DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
false,
),
];
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
#[test]
fn test_field_to_column_desc() -> Result<()> {
let message_type = "
message arrow_schema {
REQUIRED BOOLEAN boolean;
REQUIRED INT32 int8 (INT_8);
REQUIRED INT32 int16 (INTEGER(16,true));
REQUIRED INT32 int32;
REQUIRED INT64 int64;
OPTIONAL DOUBLE double;
OPTIONAL FLOAT float;
OPTIONAL BINARY string (STRING);
OPTIONAL GROUP bools (LIST) {
REPEATED GROUP list {
OPTIONAL BOOLEAN element;
}
}
REQUIRED GROUP bools_non_null (LIST) {
REPEATED GROUP list {
REQUIRED BOOLEAN element;
}
}
OPTIONAL INT32 date (DATE);
OPTIONAL INT32 time_milli (TIME(MILLIS,false));
OPTIONAL INT64 time_micro (TIME_MICROS);
OPTIONAL INT64 ts_milli (TIMESTAMP_MILLIS);
REQUIRED INT64 ts_micro (TIMESTAMP(MICROS,false));
REQUIRED GROUP struct {
REQUIRED BOOLEAN bools;
REQUIRED INT32 uint32 (INTEGER(32,false));
REQUIRED GROUP int32 (LIST) {
REPEATED GROUP list {
OPTIONAL INT32 element;
}
}
}
REQUIRED BINARY dictionary_strings (STRING);
}
";
let arrow_fields = vec![
Field::new("boolean", DataType::Boolean, false),
Field::new("int8", DataType::Int8, false),
Field::new("int16", DataType::Int16, false),
Field::new("int32", DataType::Int32, false),
Field::new("int64", DataType::Int64, false),
Field::new("double", DataType::Float64, true),
Field::new("float", DataType::Float32, true),
Field::new("string", DataType::Utf8, true),
Field::new(
"bools",
DataType::List(Box::new(Field::new("element", DataType::Boolean, true))),
true,
),
Field::new(
"bools_non_null",
DataType::List(Box::new(Field::new("element", DataType::Boolean, false))),
false,
),
Field::new("date", DataType::Date32, true),
Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micro", DataType::Time64(TimeUnit::Microsecond), true),
Field::new(
"ts_milli",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micro",
DataType::Timestamp(TimeUnit::Microsecond, None),
false,
),
Field::new(
"struct",
DataType::Struct(vec![
Field::new("bools", DataType::Boolean, false),
Field::new("uint32", DataType::UInt32, false),
Field::new(
"int32",
DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
false,
),
]),
false,
),
Field::new("dictionary_strings", DataType::Utf8, false),
];
let parquet_schema = SchemaDescriptor::try_from_message(message_type)?;
let fields = parquet_to_arrow_schema(parquet_schema.fields());
assert_eq!(arrow_fields, fields);
Ok(())
}
}