[][src]Module parquet::file

Main entrypoint for working with Parquet API.

Provides access to file and row group readers and writers, record API, metadata, etc.

See reader::SerializedFileReader or writer::SerializedFileWriter for a starting reference, metadata::ParquetMetaData for file metadata, and statistics for working with statistics.

Example of writing a new file

use std::{fs, path::Path, rc::Rc};

use parquet::{
    file::{
        properties::WriterProperties,
        writer::{FileWriter, SerializedFileWriter},
    },
    schema::parser::parse_message_type,
};

let path = Path::new("/path/to/sample.parquet");

let message_type = "
  message schema {
    REQUIRED INT32 b;
  }
";
let schema = Rc::new(parse_message_type(message_type).unwrap());
let props = Rc::new(WriterProperties::builder().build());
let file = fs::File::create(&path).unwrap();
let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
let mut row_group_writer = writer.next_row_group().unwrap();
while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
    // ... write values to a column writer
    row_group_writer.close_column(col_writer).unwrap();
}
writer.close_row_group(row_group_writer).unwrap();
writer.close().unwrap();

let bytes = fs::read(&path).unwrap();
assert_eq!(&bytes[0..4], &[b'P', b'A', b'R', b'1']);

Example of reading an existing file

use parquet::file::reader::{FileReader, SerializedFileReader};
use std::{fs::File, path::Path};

let path = Path::new("/path/to/sample.parquet");
if let Ok(file) = File::open(&path) {
    let file = File::open(&path).unwrap();
    let reader = SerializedFileReader::new(file).unwrap();

    let parquet_metadata = reader.metadata();
    assert_eq!(parquet_metadata.num_row_groups(), 1);

    let row_group_reader = reader.get_row_group(0).unwrap();
    assert_eq!(row_group_reader.num_columns(), 1);
}

Example of reading multiple files

use parquet::file::reader::SerializedFileReader;
use std::convert::TryFrom;

let paths = vec![
    "/path/to/sample.parquet/part-1.snappy.parquet",
    "/path/to/sample.parquet/part-2.snappy.parquet"
];
// Create a reader for each file and flat map rows
let rows = paths.iter()
    .map(|p| SerializedFileReader::try_from(*p).unwrap())
    .flat_map(|r| r.into_iter());

for row in rows {
    println!("{}", row);
}

Modules

metadata

Contains information about available Parquet metadata.

properties

Writer properties.

reader

Contains file reader API and provides methods to access file metadata, row group readers to read individual column chunks, or access record iterator.

statistics

Contains definitions for working with Parquet statistics.

writer

Contains file writer API, and provides methods to write row groups and columns by using row group writers and column writers respectively.