Module parquet::file[][src]

Expand description

Main entrypoint for working with Parquet API.

Provides access to file and row group readers and writers, record API, metadata, etc.

See reader::SerializedFileReader or writer::SerializedFileWriter for a starting reference, metadata::ParquetMetaData for file metadata, and statistics for working with statistics.

Example of writing a new file

use std::{fs, path::Path, sync::Arc};

use parquet::{
    file::{
        properties::WriterProperties,
        writer::{FileWriter, SerializedFileWriter},
    },
    schema::parser::parse_message_type,
};

let path = Path::new("/path/to/sample.parquet");

let message_type = "
  message schema {
    REQUIRED INT32 b;
  }
";
let schema = Arc::new(parse_message_type(message_type).unwrap());
let props = Arc::new(WriterProperties::builder().build());
let file = fs::File::create(&path).unwrap();
let mut writer = SerializedFileWriter::new(file, schema, props).unwrap();
let mut row_group_writer = writer.next_row_group().unwrap();
while let Some(mut col_writer) = row_group_writer.next_column().unwrap() {
    // ... write values to a column writer
    row_group_writer.close_column(col_writer).unwrap();
}
writer.close_row_group(row_group_writer).unwrap();
writer.close().unwrap();

let bytes = fs::read(&path).unwrap();
assert_eq!(&bytes[0..4], &[b'P', b'A', b'R', b'1']);

Example of reading an existing file

use parquet::file::reader::{FileReader, SerializedFileReader};
use std::{fs::File, path::Path};

let path = Path::new("/path/to/sample.parquet");
if let Ok(file) = File::open(&path) {
    let reader = SerializedFileReader::new(file).unwrap();

    let parquet_metadata = reader.metadata();
    assert_eq!(parquet_metadata.num_row_groups(), 1);

    let row_group_reader = reader.get_row_group(0).unwrap();
    assert_eq!(row_group_reader.num_columns(), 1);
}

Example of reading multiple files

use parquet::file::reader::SerializedFileReader;
use std::convert::TryFrom;

let paths = vec![
    "/path/to/sample.parquet/part-1.snappy.parquet",
    "/path/to/sample.parquet/part-2.snappy.parquet"
];
// Create a reader for each file and flat map rows
let rows = paths.iter()
    .map(|p| SerializedFileReader::try_from(*p).unwrap())
    .flat_map(|r| r.into_iter());

for row in rows {
    println!("{}", row);
}

Modules

footer
metadata

Contains information about available Parquet metadata.

properties

Writer properties.

reader

Contains file reader API and provides methods to access file metadata, row group readers to read individual column chunks, or access record iterator.

serialized_reader

Contains implementations of the reader traits FileReader, RowGroupReader and PageReader Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)

statistics

Contains definitions for working with Parquet statistics.

writer

Contains file writer API, and provides methods to write row groups and columns by using row group writers and column writers respectively.