Type Alias parquet::arrow::async_reader::ParquetRecordBatchStreamBuilder
source · pub type ParquetRecordBatchStreamBuilder<T> = ArrowReaderBuilder<AsyncReader<T>>;
Expand description
A builder used to construct a ParquetRecordBatchStream
for a parquet file
In particular, this handles reading the parquet file metadata, allowing consumers to use this information to select what specific columns, row groups, etc… they wish to be read by the resulting stream
See ArrowReaderBuilder
for additional member functions
Aliased Type§
struct ParquetRecordBatchStreamBuilder<T> { /* private fields */ }
Implementations§
source§impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T>
impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T>
sourcepub async fn new(input: T) -> Result<Self>
pub async fn new(input: T) -> Result<Self>
Create a new ParquetRecordBatchStreamBuilder
with the provided parquet file
sourcepub async fn new_with_options(
input: T,
options: ArrowReaderOptions
) -> Result<Self>
pub async fn new_with_options( input: T, options: ArrowReaderOptions ) -> Result<Self>
Create a new ParquetRecordBatchStreamBuilder
with the provided parquet file
and ArrowReaderOptions
sourcepub fn new_with_metadata(input: T, metadata: ArrowReaderMetadata) -> Self
pub fn new_with_metadata(input: T, metadata: ArrowReaderMetadata) -> Self
Create a ParquetRecordBatchStreamBuilder
from the provided ArrowReaderMetadata
This allows loading metadata once and using it to create multiple builders with potentially different settings
let mut file = tempfile().unwrap();
let mut file = tokio::fs::File::from_std(file);
let meta = ArrowReaderMetadata::load_async(&mut file, Default::default()).await.unwrap();
let mut a = ParquetRecordBatchStreamBuilder::new_with_metadata(
file.try_clone().await.unwrap(),
meta.clone()
).build().unwrap();
let mut b = ParquetRecordBatchStreamBuilder::new_with_metadata(file, meta).build().unwrap();
// Should be able to read from both in parallel
assert_eq!(a.next().await.unwrap().unwrap(), b.next().await.unwrap().unwrap());
sourcepub async fn get_row_group_column_bloom_filter(
&mut self,
row_group_idx: usize,
column_idx: usize
) -> Result<Option<Sbbf>>
pub async fn get_row_group_column_bloom_filter( &mut self, row_group_idx: usize, column_idx: usize ) -> Result<Option<Sbbf>>
Read bloom filter for a column in a row group
Returns None
if the column does not have a bloom filter
We should call this function after other forms pruning, such as projection and predicate pushdown.
sourcepub fn build(self) -> Result<ParquetRecordBatchStream<T>>
pub fn build(self) -> Result<ParquetRecordBatchStream<T>>
Build a new ParquetRecordBatchStream