Expand description

Provides async API for reading parquet files as RecordBatches

use arrow::record_batch::RecordBatch;
use arrow::util::pretty::pretty_format_batches;
use futures::TryStreamExt;
use tokio::fs::File;

use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};


let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{}/alltypes_plain.parquet", testdata);
let file = File::open(path).await.unwrap();

let builder = ParquetRecordBatchStreamBuilder::new(file)
    .await
    .unwrap()
    .with_batch_size(3);

let file_metadata = builder.metadata().file_metadata();
let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);

let stream = builder.with_projection(mask).build().unwrap();
let results = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(results.len(), 3);

assert_batches_eq(
    &results,
    &[
        "+----------+-------------+-----------+",
        "| bool_col | tinyint_col | float_col |",
        "+----------+-------------+-----------+",
        "| true     | 0           | 0         |",
        "| false    | 1           | 1.1       |",
        "| true     | 0           | 0         |",
        "| false    | 1           | 1.1       |",
        "| true     | 0           | 0         |",
        "| false    | 1           | 1.1       |",
        "| true     | 0           | 0         |",
        "| false    | 1           | 1.1       |",
        "+----------+-------------+-----------+",
     ],
 );

Structs

An asynchronous [Stream] of RecordBatch for a parquet file that can be constructed using ParquetRecordBatchStreamBuilder

Traits

The asynchronous interface used by ParquetRecordBatchStream to read parquet files

Type Definitions

A builder used to construct a ParquetRecordBatchStream for a parquet file