pub type ParquetRecordBatchStreamBuilder<T> = ArrowReaderBuilder<AsyncReader<T>>;
Expand description

A builder used to construct a ParquetRecordBatchStream for a parquet file

In particular, this handles reading the parquet file metadata, allowing consumers to use this information to select what specific columns, row groups, etc… they wish to be read by the resulting stream

See ArrowReaderBuilder for additional member functions

Aliased Type§

struct ParquetRecordBatchStreamBuilder<T> { /* private fields */ }

Implementations§

source§

impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T>

source

pub async fn new(input: T) -> Result<Self>

Create a new ParquetRecordBatchStreamBuilder with the provided parquet file

source

pub async fn new_with_options( input: T, options: ArrowReaderOptions ) -> Result<Self>

Create a new ParquetRecordBatchStreamBuilder with the provided parquet file and ArrowReaderOptions

source

pub fn new_with_metadata(input: T, metadata: ArrowReaderMetadata) -> Self

Create a ParquetRecordBatchStreamBuilder from the provided ArrowReaderMetadata

This allows loading metadata once and using it to create multiple builders with potentially different settings

let mut file = tempfile().unwrap();
let mut file = tokio::fs::File::from_std(file);
let meta = ArrowReaderMetadata::load_async(&mut file, Default::default()).await.unwrap();
let mut a = ParquetRecordBatchStreamBuilder::new_with_metadata(
    file.try_clone().await.unwrap(),
    meta.clone()
).build().unwrap();
let mut b = ParquetRecordBatchStreamBuilder::new_with_metadata(file, meta).build().unwrap();

// Should be able to read from both in parallel
assert_eq!(a.next().await.unwrap().unwrap(), b.next().await.unwrap().unwrap());
source

pub async fn get_row_group_column_bloom_filter( &mut self, row_group_idx: usize, column_idx: usize ) -> Result<Option<Sbbf>>

Read bloom filter for a column in a row group Returns None if the column does not have a bloom filter

We should call this function after other forms pruning, such as projection and predicate pushdown.

source

pub fn build(self) -> Result<ParquetRecordBatchStream<T>>