pub struct ParquetExec { /* private fields */ }
Expand description

Execution plan for scanning one or more Parquet partitions

Implementations§

source§

impl ParquetExec

source

pub fn new( base_config: FileScanConfig, predicate: Option<Expr>, metadata_size_hint: Option<usize> ) -> Self

Create a new Parquet reader execution plan provided file list and schema.

source

pub fn base_config(&self) -> &FileScanConfig

Ref to the base configs

source

pub fn pruning_predicate(&self) -> Option<&Arc<PruningPredicate>>

Optional reference to this parquet scan’s pruning predicate

source

pub fn with_parquet_file_reader_factory( self, parquet_file_reader_factory: Arc<dyn ParquetFileReaderFactory> ) -> Self

Optional user defined parquet file reader factory.

ParquetFileReaderFactory complements TableProvider, It enables users to provide custom implementation for data access operations.

If custom ParquetFileReaderFactory is provided, then data access operations will be routed to this factory instead of ObjectStore.

source

pub fn with_pushdown_filters(self, pushdown_filters: bool) -> Self

If true, any filter Exprs on the scan will converted to a RowFilter in the ParquetRecordBatchStream. These filters are applied by the parquet decoder to skip unecessairly decoding other columns which would not pass the predicate. Defaults to false

source

pub fn with_reorder_filters(self, reorder_filters: bool) -> Self

If true, the RowFilter made by pushdown_filters may try to minimize the cost of filter evaluation by reordering the predicate Exprs. If false, the predicates are applied in the same order as specified in the query. Defaults to false.

source

pub fn with_enable_page_index(self, enable_page_index: bool) -> Self

If enabled, the reader will read the page index This is used to optimise filter pushdown via RowSelector and RowFilter by eliminating unnecessary IO and decoding

source

pub fn get_repartitioned( &self, target_partitions: usize, repartition_file_min_size: usize ) -> Self

Redistribute files across partitions according to their size

Trait Implementations§

source§

impl Clone for ParquetExec

source§

fn clone(&self) -> ParquetExec

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Debug for ParquetExec

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl ExecutionPlan for ParquetExec

source§

fn as_any(&self) -> &dyn Any

Return a reference to Any that can be used for downcasting

source§

fn output_partitioning(&self) -> Partitioning

Get the output partitioning of this plan

source§

fn schema(&self) -> SchemaRef

Get the schema for this execution plan
source§

fn children(&self) -> Vec<Arc<dyn ExecutionPlan>>

Get a list of child execution plans that provide the input for this plan. The returned list will be empty for leaf nodes, will contain a single value for unary nodes, or two values for binary nodes (such as joins).
source§

fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>

If the output of this operator within each partition is sorted, returns Some(keys) with the description of how it was sorted. Read more
source§

fn with_new_children( self: Arc<Self>, _: Vec<Arc<dyn ExecutionPlan>> ) -> Result<Arc<dyn ExecutionPlan>>

Returns a new plan where all children were replaced by new plans.
source§

fn execute( &self, partition_index: usize, ctx: Arc<TaskContext> ) -> Result<SendableRecordBatchStream>

creates an iterator
source§

fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> Result

Format this ExecutionPlan to f in the specified type. Read more
source§

fn metrics(&self) -> Option<MetricsSet>

Return a snapshot of the set of Metrics for this ExecutionPlan. Read more
source§

fn statistics(&self) -> Statistics

Returns the global output statistics for this ExecutionPlan node.
source§

fn unbounded_output(&self, _children: &[bool]) -> Result<bool>

Specifies whether this plan generates an infinite stream of records. If the plan does not support pipelining, but it its input(s) are infinite, returns an error to indicate this.
source§

fn required_input_distribution(&self) -> Vec<Distribution>

Specifies the data distribution requirements for all the children for this operator, By default it’s [Distribution::UnspecifiedDistribution] for each child,
source§

fn required_input_ordering(&self) -> Vec<Option<&[PhysicalSortExpr]>>

Specifies the ordering requirements for all of the children For each child, it’s the local ordering requirement within each partition rather than the global ordering Read more
source§

fn maintains_input_order(&self) -> Vec<bool>

Returns false if this operator’s implementation may reorder rows within or between partitions. Read more
source§

fn benefits_from_input_partitioning(&self) -> bool

Returns true if this operator would benefit from partitioning its input (and thus from more parallelism). For operators that do very little work the overhead of extra parallelism may outweigh any benefits Read more
source§

fn equivalence_properties(&self) -> EquivalenceProperties

Get the EquivalenceProperties within the plan

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

const: unstable · source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for Twhere U: From<T>,

const: unstable · source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Same<T> for T

§

type Output = T

Should always be Self
source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
const: unstable · source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
const: unstable · source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

fn vzip(self) -> V

source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more