Module parquet::arrow[][src]

Expand description

Apache Arrow is a cross-language development platform for in-memory data.

This mod provides API for converting between arrow and parquet.

Example of reading parquet file into arrow record batch

use arrow::record_batch::RecordBatchReader;
use parquet::file::reader::SerializedFileReader;
use parquet::arrow::{ParquetFileArrowReader, ArrowReader};
use std::sync::Arc;
use std::fs::File;

let file = File::open("parquet.file").unwrap();
let file_reader = SerializedFileReader::new(file).unwrap();
let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));

println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());
println!("Arrow schema after projection is: {}",
   arrow_reader.get_schema_by_columns(vec![2, 4, 6], true).unwrap());

let mut record_batch_reader = arrow_reader.get_record_reader(2048).unwrap();

for maybe_record_batch in record_batch_reader {
   let record_batch = maybe_record_batch.unwrap();
   if record_batch.num_rows() > 0 {
       println!("Read {} records.", record_batch.num_rows());
   } else {
       println!("End of file!");
   }
}

Re-exports

pub use self::arrow_reader::ArrowReader;
pub use self::arrow_reader::ParquetFileArrowReader;
pub use self::arrow_writer::ArrowWriter;
pub use self::schema::arrow_to_parquet_schema;
pub use self::schema::parquet_to_arrow_schema;
pub use self::schema::parquet_to_arrow_schema_by_columns;
pub use self::schema::parquet_to_arrow_schema_by_root_columns;

Modules

arrow_reader

Contains reader which reads parquet data into arrow array.

arrow_writer

Contains writer which writes arrow data into parquet data.

schema

Provides API for converting parquet schema to arrow schema and vice versa.

Constants

ARROW_SCHEMA_META_KEY

Schema metadata key used to store serialized Arrow IPC schema