Trait datafusion::datasource::file_format::FileFormat
source · pub trait FileFormat: Send + Sync + Debug {
// Required methods
fn as_any(&self) -> &dyn Any;
fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 SessionState,
store: &'life2 Arc<dyn ObjectStore>,
objects: &'life3 [ObjectMeta]
) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait;
fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 SessionState,
store: &'life2 Arc<dyn ObjectStore>,
table_schema: SchemaRef,
object: &'life3 ObjectMeta
) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait;
fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
state: &'life1 SessionState,
conf: FileScanConfig,
filters: Option<&'life2 Arc<dyn PhysicalExpr>>
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait;
fn file_type(&self) -> FileType;
// Provided method
fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: Arc<dyn ExecutionPlan>,
_state: &'life1 SessionState,
_conf: FileSinkConfig,
_order_requirements: Option<Vec<PhysicalSortRequirement>>
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait { ... }
}
Expand description
This trait abstracts all the file format specific implementations
from the TableProvider
. This helps code re-utilization across
providers that support the the same file formats.
Required Methods§
sourcefn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Returns the table provider as Any
so that it can be
downcast to a specific implementation.
sourcefn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 SessionState,
store: &'life2 Arc<dyn ObjectStore>,
objects: &'life3 [ObjectMeta]
) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 SessionState, store: &'life2 Arc<dyn ObjectStore>, objects: &'life3 [ObjectMeta] ) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,
Infer the common schema of the provided objects. The objects will usually be analysed up to a given number of records or files (as specified in the format config) then give the estimated common schema. This might fail if the files have schemas that cannot be merged.
sourcefn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 SessionState,
store: &'life2 Arc<dyn ObjectStore>,
table_schema: SchemaRef,
object: &'life3 ObjectMeta
) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 SessionState, store: &'life2 Arc<dyn ObjectStore>, table_schema: SchemaRef, object: &'life3 ObjectMeta ) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,
Infer the statistics for the provided object. The cost and accuracy of the estimated statistics might vary greatly between file formats.
table_schema
is the (combined) schema of the overall table
and may be a superset of the schema contained in this file.
TODO: should the file source return statistics for only columns referred to in the table schema?
sourcefn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
state: &'life1 SessionState,
conf: FileScanConfig,
filters: Option<&'life2 Arc<dyn PhysicalExpr>>
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, state: &'life1 SessionState, conf: FileScanConfig, filters: Option<&'life2 Arc<dyn PhysicalExpr>> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,
Take a list of files and convert it to the appropriate executor according to this file format.
Provided Methods§
sourcefn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
_input: Arc<dyn ExecutionPlan>,
_state: &'life1 SessionState,
_conf: FileSinkConfig,
_order_requirements: Option<Vec<PhysicalSortRequirement>>
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn create_writer_physical_plan<'life0, 'life1, 'async_trait>( &'life0 self, _input: Arc<dyn ExecutionPlan>, _state: &'life1 SessionState, _conf: FileSinkConfig, _order_requirements: Option<Vec<PhysicalSortRequirement>> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,
Take a list of files and the configuration to convert it to the appropriate writer executor according to this file format.