Module parquet::arrow::async_reader
source · [−]Expand description
Provides async
API for reading parquet files as
RecordBatch
es
use arrow::record_batch::RecordBatch;
use arrow::util::pretty::pretty_format_batches;
use futures::TryStreamExt;
use tokio::fs::File;
use parquet::arrow::ParquetRecordBatchStreamBuilder;
let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{}/alltypes_plain.parquet", testdata);
let file = tokio::fs::File::open(path).await.unwrap();
let builder = ParquetRecordBatchStreamBuilder::new(file)
.await
.unwrap()
.with_projection(vec![1, 2, 6])
.with_batch_size(3);
let stream = builder.build().unwrap();
let results = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(results.len(), 3);
assert_batches_eq(
&results,
&[
"+----------+-------------+-----------+",
"| bool_col | tinyint_col | float_col |",
"+----------+-------------+-----------+",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"| true | 0 | 0 |",
"| false | 1 | 1.1 |",
"+----------+-------------+-----------+",
],
);
Structs
An asynchronous [Stream
] of RecordBatch
for a parquet file
A builder used to construct a ParquetRecordBatchStream
for a parquet file