Module parquet::arrow::async_reader

source ·
Expand description

Provides async API for reading parquet files as RecordBatches

let testdata = arrow::util::test_util::parquet_test_data();
let path = format!("{}/alltypes_plain.parquet", testdata);
let file = File::open(path).await.unwrap();

let builder = ParquetRecordBatchStreamBuilder::new(file)
    .await
    .unwrap()
    .with_batch_size(3);

let file_metadata = builder.metadata().file_metadata();
let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);

let stream = builder.with_projection(mask).build().unwrap();
let results = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(results.len(), 3);

assert_batches_eq(
    &results,
    &[
        "+----------+-------------+-----------+",
        "| bool_col | tinyint_col | float_col |",
        "+----------+-------------+-----------+",
        "| true     | 0           | 0.0       |",
        "| false    | 1           | 1.1       |",
        "| true     | 0           | 0.0       |",
        "| false    | 1           | 1.1       |",
        "| true     | 0           | 0.0       |",
        "| false    | 1           | 1.1       |",
        "| true     | 0           | 0.0       |",
        "| false    | 1           | 1.1       |",
        "+----------+-------------+-----------+",
     ],
 );

Structs§

Traits§

Functions§

Type Aliases§