Module parquet::schema

source ·
Expand description

Parquet schema definitions and methods to print and parse schema.

Example

use parquet::{
    basic::{ConvertedType, Repetition, Type as PhysicalType},
    schema::{parser, printer, types::Type},
};
use std::sync::Arc;

// Create the following schema:
//
// message schema {
//   OPTIONAL BYTE_ARRAY a (UTF8);
//   REQUIRED INT32 b;
// }

let field_a = Type::primitive_type_builder("a", PhysicalType::BYTE_ARRAY)
    .with_converted_type(ConvertedType::UTF8)
    .with_repetition(Repetition::OPTIONAL)
    .build()
    .unwrap();

let field_b = Type::primitive_type_builder("b", PhysicalType::INT32)
    .with_repetition(Repetition::REQUIRED)
    .build()
    .unwrap();

let schema = Type::group_type_builder("schema")
    .with_fields(&mut vec![Arc::new(field_a), Arc::new(field_b)])
    .build()
    .unwrap();

let mut buf = Vec::new();

// Print schema into buffer
printer::print_schema(&mut buf, &schema);

// Parse schema from the string
let string_schema = String::from_utf8(buf).unwrap();
let parsed_schema = parser::parse_message_type(&string_schema).unwrap();

assert_eq!(schema, parsed_schema);

Modules

  • Parquet schema parser. Provides methods to parse and validate string message type into Parquet Type.
  • Parquet schema printer. Provides methods to print Parquet file schema and list file metadata.
  • Contains structs and methods to build Parquet schema and schema descriptors.