pub struct ParquetFormat { /* private fields */ }
Expand description
The Apache Parquet FileFormat
implementation
Implementations§
Source§impl ParquetFormat
impl ParquetFormat
Sourcepub fn with_enable_pruning(self, enable: bool) -> Self
pub fn with_enable_pruning(self, enable: bool) -> Self
Activate statistics based row group level pruning
- If
None
, defaults to value onconfig_options
Sourcepub fn enable_pruning(&self) -> bool
pub fn enable_pruning(&self) -> bool
Return true
if pruning is enabled
Sourcepub fn with_metadata_size_hint(self, size_hint: Option<usize>) -> Self
pub fn with_metadata_size_hint(self, size_hint: Option<usize>) -> Self
Provide a hint to the size of the file metadata. If a hint is provided
the reader will try and fetch the last size_hint
bytes of the parquet file optimistically.
Without a hint, two read are required. One read to fetch the 8-byte parquet footer and then
another read to fetch the metadata length encoded in the footer.
- If
None
, defaults to value onconfig_options
Sourcepub fn metadata_size_hint(&self) -> Option<usize>
pub fn metadata_size_hint(&self) -> Option<usize>
Return the metadata size hint if set
Sourcepub fn with_skip_metadata(self, skip_metadata: bool) -> Self
pub fn with_skip_metadata(self, skip_metadata: bool) -> Self
Tell the parquet reader to skip any metadata that may be in the file Schema. This can help avoid schema conflicts due to metadata.
- If
None
, defaults to value onconfig_options
Sourcepub fn skip_metadata(&self) -> bool
pub fn skip_metadata(&self) -> bool
Returns true
if schema metadata will be cleared prior to
schema merging.
Sourcepub fn with_options(self, options: TableParquetOptions) -> Self
pub fn with_options(self, options: TableParquetOptions) -> Self
Set Parquet options for the ParquetFormat
Sourcepub fn options(&self) -> &TableParquetOptions
pub fn options(&self) -> &TableParquetOptions
Parquet options
Sourcepub fn force_view_types(&self) -> bool
pub fn force_view_types(&self) -> bool
Return true
if should use view types.
If this returns true, DataFusion will instruct the parquet reader
to read string / binary columns using view StringView
or BinaryView
if the table schema specifies those types, regardless of any embedded metadata
that may specify an alternate Arrow type. The parquet reader is optimized
for reading StringView
and BinaryView
and such queries are significantly faster.
If this returns false, the parquet reader will read the columns according to the
defaults or any embedded Arrow type information. This may result in reading
StringArrays
and then casting to StringViewArray
which is less efficient.
Sourcepub fn with_force_view_types(self, use_views: bool) -> Self
pub fn with_force_view_types(self, use_views: bool) -> Self
If true, will use view types. See Self::force_view_types
for details
Sourcepub fn binary_as_string(&self) -> bool
pub fn binary_as_string(&self) -> bool
Return true
if binary types will be read as strings.
If this returns true, DataFusion will instruct the parquet reader
to read binary columns such as Binary
or BinaryView
as the
corresponding string type such as Utf8
or LargeUtf8
.
The parquet reader has special optimizations for Utf8
and LargeUtf8
validation, and such queries are significantly faster than reading
binary columns and then casting to string columns.
Sourcepub fn with_binary_as_string(self, binary_as_string: bool) -> Self
pub fn with_binary_as_string(self, binary_as_string: bool) -> Self
If true, will read binary types as strings. See Self::binary_as_string
for details
pub fn coerce_int96(&self) -> Option<String>
pub fn with_coerce_int96(self, time_unit: Option<String>) -> Self
Trait Implementations§
Source§impl Debug for ParquetFormat
impl Debug for ParquetFormat
Source§impl Default for ParquetFormat
impl Default for ParquetFormat
Source§fn default() -> ParquetFormat
fn default() -> ParquetFormat
Source§impl FileFormat for ParquetFormat
impl FileFormat for ParquetFormat
Source§fn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Any
so that it can be
downcast to a specific implementation.Source§fn get_ext_with_compression(
&self,
file_compression_type: &FileCompressionType,
) -> Result<String>
fn get_ext_with_compression( &self, file_compression_type: &FileCompressionType, ) -> Result<String>
Source§fn compression_type(&self) -> Option<FileCompressionType>
fn compression_type(&self) -> Option<FileCompressionType>
Source§fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
store: &'life2 Arc<dyn ObjectStore>,
objects: &'life3 [ObjectMeta],
) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
store: &'life2 Arc<dyn ObjectStore>,
objects: &'life3 [ObjectMeta],
) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
Source§fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
store: &'life2 Arc<dyn ObjectStore>,
table_schema: SchemaRef,
object: &'life3 ObjectMeta,
) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
store: &'life2 Arc<dyn ObjectStore>,
table_schema: SchemaRef,
object: &'life3 ObjectMeta,
) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
Source§fn create_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
conf: FileScanConfig,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn create_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
conf: FileScanConfig,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Source§fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
input: Arc<dyn ExecutionPlan>,
_state: &'life1 dyn Session,
conf: FileSinkConfig,
order_requirements: Option<LexRequirement>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
input: Arc<dyn ExecutionPlan>,
_state: &'life1 dyn Session,
conf: FileSinkConfig,
order_requirements: Option<LexRequirement>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Source§fn file_source(&self) -> Arc<dyn FileSource>
fn file_source(&self) -> Arc<dyn FileSource>
CsvSource
, JsonSource
, etc.Auto Trait Implementations§
impl Freeze for ParquetFormat
impl RefUnwindSafe for ParquetFormat
impl Send for ParquetFormat
impl Sync for ParquetFormat
impl Unpin for ParquetFormat
impl UnwindSafe for ParquetFormat
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more