pub trait FileFormat: Send + Sync + Debug {
    // Required methods
    fn as_any(&self) -> &dyn Any;
    fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 self,
        state: &'life1 SessionState,
        store: &'life2 Arc<dyn ObjectStore>,
        objects: &'life3 [ObjectMeta]
    ) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait;
    fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 self,
        state: &'life1 SessionState,
        store: &'life2 Arc<dyn ObjectStore>,
        table_schema: SchemaRef,
        object: &'life3 ObjectMeta
    ) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait;
    fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>(
        &'life0 self,
        state: &'life1 SessionState,
        conf: FileScanConfig,
        filters: Option<&'life2 Arc<dyn PhysicalExpr>>
    ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait;
    fn file_type(&self) -> FileType;

    // Provided method
    fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
        &'life0 self,
        _input: Arc<dyn ExecutionPlan>,
        _state: &'life1 SessionState,
        _conf: FileSinkConfig,
        _order_requirements: Option<Vec<PhysicalSortRequirement>>
    ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait { ... }
}
Expand description

This trait abstracts all the file format specific implementations from the TableProvider. This helps code re-utilization across providers that support the the same file formats.

Required Methods§

source

fn as_any(&self) -> &dyn Any

Returns the table provider as Any so that it can be downcast to a specific implementation.

source

fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 SessionState, store: &'life2 Arc<dyn ObjectStore>, objects: &'life3 [ObjectMeta] ) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Infer the common schema of the provided objects. The objects will usually be analysed up to a given number of records or files (as specified in the format config) then give the estimated common schema. This might fail if the files have schemas that cannot be merged.

source

fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 SessionState, store: &'life2 Arc<dyn ObjectStore>, table_schema: SchemaRef, object: &'life3 ObjectMeta ) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Infer the statistics for the provided object. The cost and accuracy of the estimated statistics might vary greatly between file formats.

table_schema is the (combined) schema of the overall table and may be a superset of the schema contained in this file.

TODO: should the file source return statistics for only columns referred to in the table schema?

source

fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, state: &'life1 SessionState, conf: FileScanConfig, filters: Option<&'life2 Arc<dyn PhysicalExpr>> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Take a list of files and convert it to the appropriate executor according to this file format.

source

fn file_type(&self) -> FileType

Returns the FileType corresponding to this FileFormat

Provided Methods§

source

fn create_writer_physical_plan<'life0, 'life1, 'async_trait>( &'life0 self, _input: Arc<dyn ExecutionPlan>, _state: &'life1 SessionState, _conf: FileSinkConfig, _order_requirements: Option<Vec<PhysicalSortRequirement>> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Take a list of files and the configuration to convert it to the appropriate writer executor according to this file format.

Implementors§