Module parquet::arrow::async_reader
source · [−]Expand description
Provides async
API for reading parquet files as
RecordBatch
es
use arrow::record_batch::RecordBatch;
use arrow::util::pretty::pretty_format_batches;
use futures::TryStreamExt;
use tokio::fs::File;
use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{}/alltypes_plain.parquet", testdata);
let file = File::open(path).await.unwrap();
let builder = ParquetRecordBatchStreamBuilder::new(file)
.await
.unwrap()
.with_batch_size(3);
let file_metadata = builder.metadata().file_metadata();
let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);
let stream = builder.with_projection(mask).build().unwrap();
let results = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(results.len(), 3);
assert_batches_eq(
&results,
&[
"+----------+-------------+-----------+",
"| bool_col | tinyint_col | float_col |",
"+----------+-------------+-----------+",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"+----------+-------------+-----------+",
],
);
Structs
An asynchronous [Stream
] of RecordBatch
for a parquet file that can be
constructed using ParquetRecordBatchStreamBuilder
Traits
The asynchronous interface used by ParquetRecordBatchStream
to read parquet files
Type Definitions
A builder used to construct a ParquetRecordBatchStream
for a parquet file