pub struct ParquetFormat { /* private fields */ }
Expand description

The Apache Parquet FileFormat implementation

Note it is recommended these are instead configured on the ConfigOptions associated with the SessionState instead of overridden on a format-basis

TODO: Deprecate and remove overrides https://github.com/apache/arrow-datafusion/issues/4349

Implementations§

source§

impl ParquetFormat

source

pub fn new() -> Self

Construct a new Format with no local overrides

source

pub fn with_enable_pruning(self, enable: Option<bool>) -> Self

Activate statistics based row group level pruning

  • If None, defaults to value on config_options
source

pub fn enable_pruning(&self, config_options: &ConfigOptions) -> bool

Return true if pruning is enabled

source

pub fn with_metadata_size_hint(self, size_hint: Option<usize>) -> Self

Provide a hint to the size of the file metadata. If a hint is provided the reader will try and fetch the last size_hint bytes of the parquet file optimistically. Without a hint, two read are required. One read to fetch the 8-byte parquet footer and then another read to fetch the metadata length encoded in the footer.

  • If None, defaults to value on config_options
source

pub fn metadata_size_hint( &self, config_options: &ConfigOptions ) -> Option<usize>

Return the metadata size hint if set

source

pub fn with_skip_metadata(self, skip_metadata: Option<bool>) -> Self

Tell the parquet reader to skip any metadata that may be in the file Schema. This can help avoid schema conflicts due to metadata.

  • If None, defaults to value on config_options
source

pub fn skip_metadata(&self, config_options: &ConfigOptions) -> bool

Returns true if schema metadata will be cleared prior to schema merging.

Trait Implementations§

source§

impl Debug for ParquetFormat

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl Default for ParquetFormat

source§

fn default() -> ParquetFormat

Returns the “default value” for a type. Read more
source§

impl FileFormat for ParquetFormat

source§

fn as_any(&self) -> &dyn Any

Returns the table provider as Any so that it can be downcast to a specific implementation.
source§

fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 SessionState, store: &'life2 Arc<dyn ObjectStore>, objects: &'life3 [ObjectMeta] ) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Infer the common schema of the provided objects. The objects will usually be analysed up to a given number of records or files (as specified in the format config) then give the estimated common schema. This might fail if the files have schemas that cannot be merged.
source§

fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, _state: &'life1 SessionState, store: &'life2 Arc<dyn ObjectStore>, table_schema: SchemaRef, object: &'life3 ObjectMeta ) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Infer the statistics for the provided object. The cost and accuracy of the estimated statistics might vary greatly between file formats. Read more
source§

fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, state: &'life1 SessionState, conf: FileScanConfig, filters: Option<&'life2 Arc<dyn PhysicalExpr>> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Take a list of files and convert it to the appropriate executor according to this file format.
source§

fn create_writer_physical_plan<'life0, 'life1, 'async_trait>( &'life0 self, input: Arc<dyn ExecutionPlan>, _state: &'life1 SessionState, conf: FileSinkConfig, order_requirements: Option<Vec<PhysicalSortRequirement>> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Take a list of files and the configuration to convert it to the appropriate writer executor according to this file format.
source§

fn file_type(&self) -> FileType

Returns the FileType corresponding to this FileFormat

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Same for T

§

type Output = T

Should always be Self
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> Allocation for Twhere T: RefUnwindSafe + Send + Sync,