Trait FileFormat

Source
pub trait FileFormat:
    Send
    + Sync
    + Debug {
    // Required methods
    fn as_any(&self) -> &dyn Any;
    fn get_ext(&self) -> String;
    fn get_ext_with_compression(
        &self,
        _file_compression_type: &FileCompressionType,
    ) -> Result<String>;
    fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 self,
        state: &'life1 dyn Session,
        store: &'life2 Arc<dyn ObjectStore>,
        objects: &'life3 [ObjectMeta],
    ) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait;
    fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 self,
        state: &'life1 dyn Session,
        store: &'life2 Arc<dyn ObjectStore>,
        table_schema: SchemaRef,
        object: &'life3 ObjectMeta,
    ) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait;
    fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>(
        &'life0 self,
        state: &'life1 dyn Session,
        conf: FileScanConfig,
        filters: Option<&'life2 Arc<dyn PhysicalExpr>>,
    ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait;
    fn file_source(&self) -> Arc<dyn FileSource>;

    // Provided methods
    fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
        &'life0 self,
        _input: Arc<dyn ExecutionPlan>,
        _state: &'life1 dyn Session,
        _conf: FileSinkConfig,
        _order_requirements: Option<LexRequirement>,
    ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait { ... }
    fn supports_filters_pushdown(
        &self,
        _file_schema: &Schema,
        _table_schema: &Schema,
        _filters: &[&Expr],
    ) -> Result<FilePushdownSupport> { ... }
}
Expand description

This trait abstracts all the file format specific implementations from the TableProvider. This helps code re-utilization across providers that support the same file formats.

Required Methods§

Source

fn as_any(&self) -> &dyn Any

Returns the table provider as Any so that it can be downcast to a specific implementation.

Source

fn get_ext(&self) -> String

Returns the extension for this FileFormat, e.g. “file.csv” -> csv

Source

fn get_ext_with_compression( &self, _file_compression_type: &FileCompressionType, ) -> Result<String>

Returns the extension for this FileFormat when compressed, e.g. “file.csv.gz” -> csv

Source

fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 dyn Session, store: &'life2 Arc<dyn ObjectStore>, objects: &'life3 [ObjectMeta], ) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Infer the common schema of the provided objects. The objects will usually be analysed up to a given number of records or files (as specified in the format config) then give the estimated common schema. This might fail if the files have schemas that cannot be merged.

Source

fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 dyn Session, store: &'life2 Arc<dyn ObjectStore>, table_schema: SchemaRef, object: &'life3 ObjectMeta, ) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Infer the statistics for the provided object. The cost and accuracy of the estimated statistics might vary greatly between file formats.

table_schema is the (combined) schema of the overall table and may be a superset of the schema contained in this file.

TODO: should the file source return statistics for only columns referred to in the table schema?

Source

fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, state: &'life1 dyn Session, conf: FileScanConfig, filters: Option<&'life2 Arc<dyn PhysicalExpr>>, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Take a list of files and convert it to the appropriate executor according to this file format.

Source

fn file_source(&self) -> Arc<dyn FileSource>

Return the related FileSource such as CsvSource, JsonSource, etc.

Provided Methods§

Source

fn create_writer_physical_plan<'life0, 'life1, 'async_trait>( &'life0 self, _input: Arc<dyn ExecutionPlan>, _state: &'life1 dyn Session, _conf: FileSinkConfig, _order_requirements: Option<LexRequirement>, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Take a list of files and the configuration to convert it to the appropriate writer executor according to this file format.

Source

fn supports_filters_pushdown( &self, _file_schema: &Schema, _table_schema: &Schema, _filters: &[&Expr], ) -> Result<FilePushdownSupport>

Check if the specified file format has support for pushing down the provided filters within the given schemas. Added initially to support the Parquet file format’s ability to do this.

Implementors§