[][src]Module parquet::arrow

Apache Arrow is a cross-language development platform for in-memory data.

This mod provides API for converting between arrow and parquet.

Example of reading parquet file into arrow record batch

use arrow::record_batch::RecordBatchReader;
use parquet::file::reader::SerializedFileReader;
use parquet::arrow::{ParquetFileArrowReader, ArrowReader};
use std::rc::Rc;
use std::fs::File;

let file = File::open("parquet.file").unwrap();
let file_reader = SerializedFileReader::new(file).unwrap();
let mut arrow_reader = ParquetFileArrowReader::new(Rc::new(file_reader));

println!("Converted arrow schema is: {}", arrow_reader.get_schema().unwrap());
println!("Arrow schema after projection is: {}",
   arrow_reader.get_schema_by_columns(vec![2, 4, 6]).unwrap());

let mut record_batch_reader = arrow_reader.get_record_reader(2048).unwrap();

loop {
   let record_batch = record_batch_reader.next_batch().unwrap().unwrap();
   if record_batch.num_rows() > 0 {
       println!("Read {} records.", record_batch.num_rows());
   } else {
       println!("End of file!");
   }
}

Re-exports

pub use self::arrow_reader::ArrowReader;
pub use self::arrow_reader::ParquetFileArrowReader;
pub use self::schema::parquet_to_arrow_schema;
pub use self::schema::parquet_to_arrow_schema_by_columns;

Modules

arrow_reader

Contains reader which reads parquet data into arrow array.

schema

Provides API for converting parquet schema to arrow schema and vice versa.