Trait FileSource

Source
pub trait FileSource: Send + Sync {
    // Required methods
    fn create_file_opener(
        &self,
        object_store: Arc<dyn ObjectStore>,
        base_config: &FileScanConfig,
        partition: usize,
    ) -> Arc<dyn FileOpener>;
    fn as_any(&self) -> &(dyn Any + 'static);
    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>;
    fn with_schema(&self, schema: Arc<Schema>) -> Arc<dyn FileSource>;
    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>;
    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>;
    fn metrics(&self) -> &ExecutionPlanMetricsSet;
    fn statistics(&self) -> Result<Statistics, DataFusionError>;
    fn file_type(&self) -> &str;

    // Provided methods
    fn fmt_extra(
        &self,
        _t: DisplayFormatType,
        _f: &mut Formatter<'_>,
    ) -> Result<(), Error> { ... }
    fn repartitioned(
        &self,
        target_partitions: usize,
        repartition_file_min_size: usize,
        output_ordering: Option<LexOrdering>,
        config: &FileScanConfig,
    ) -> Result<Option<FileScanConfig>, DataFusionError> { ... }
}
Expand description

Common file format behaviors needs to implement.

See implementation examples such as ParquetSource, CsvSource

Required Methods§

Source

fn create_file_opener( &self, object_store: Arc<dyn ObjectStore>, base_config: &FileScanConfig, partition: usize, ) -> Arc<dyn FileOpener>

Creates a dyn FileOpener based on given parameters

Source

fn as_any(&self) -> &(dyn Any + 'static)

Any

Source

fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>

Initialize new type with batch size configuration

Source

fn with_schema(&self, schema: Arc<Schema>) -> Arc<dyn FileSource>

Initialize new instance with a new schema

Source

fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>

Initialize new instance with projection information

Source

fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>

Initialize new instance with projected statistics

Source

fn metrics(&self) -> &ExecutionPlanMetricsSet

Return execution plan metrics

Source

fn statistics(&self) -> Result<Statistics, DataFusionError>

Return projected statistics

Source

fn file_type(&self) -> &str

String representation of file source such as “csv”, “json”, “parquet”

Provided Methods§

Source

fn fmt_extra( &self, _t: DisplayFormatType, _f: &mut Formatter<'_>, ) -> Result<(), Error>

Format FileType specific information

Source

fn repartitioned( &self, target_partitions: usize, repartition_file_min_size: usize, output_ordering: Option<LexOrdering>, config: &FileScanConfig, ) -> Result<Option<FileScanConfig>, DataFusionError>

If supported by the FileSource, redistribute files across partitions according to their size. Allows custom file formats to implement their own repartitioning logic.

Provides a default repartitioning behavior, see comments on FileGroupPartitioner for more detail.

Implementors§