pub trait FileSource: Send + Sync {
// Required methods
fn create_file_opener(
&self,
object_store: Arc<dyn ObjectStore>,
base_config: &FileScanConfig,
partition: usize,
) -> Arc<dyn FileOpener>;
fn as_any(&self) -> &(dyn Any + 'static);
fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>;
fn with_schema(&self, schema: Arc<Schema>) -> Arc<dyn FileSource>;
fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>;
fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>;
fn metrics(&self) -> &ExecutionPlanMetricsSet;
fn statistics(&self) -> Result<Statistics, DataFusionError>;
fn file_type(&self) -> &str;
// Provided methods
fn fmt_extra(
&self,
_t: DisplayFormatType,
_f: &mut Formatter<'_>,
) -> Result<(), Error> { ... }
fn repartitioned(
&self,
target_partitions: usize,
repartition_file_min_size: usize,
output_ordering: Option<LexOrdering>,
config: &FileScanConfig,
) -> Result<Option<FileScanConfig>, DataFusionError> { ... }
}
Expand description
Common file format behaviors needs to implement.
See implementation examples such as ParquetSource
, CsvSource
Required Methods§
Sourcefn create_file_opener(
&self,
object_store: Arc<dyn ObjectStore>,
base_config: &FileScanConfig,
partition: usize,
) -> Arc<dyn FileOpener>
fn create_file_opener( &self, object_store: Arc<dyn ObjectStore>, base_config: &FileScanConfig, partition: usize, ) -> Arc<dyn FileOpener>
Creates a dyn FileOpener
based on given parameters
Sourcefn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>
fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>
Initialize new type with batch size configuration
Sourcefn with_schema(&self, schema: Arc<Schema>) -> Arc<dyn FileSource>
fn with_schema(&self, schema: Arc<Schema>) -> Arc<dyn FileSource>
Initialize new instance with a new schema
Sourcefn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>
fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>
Initialize new instance with projection information
Sourcefn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>
fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource>
Initialize new instance with projected statistics
Sourcefn metrics(&self) -> &ExecutionPlanMetricsSet
fn metrics(&self) -> &ExecutionPlanMetricsSet
Return execution plan metrics
Sourcefn statistics(&self) -> Result<Statistics, DataFusionError>
fn statistics(&self) -> Result<Statistics, DataFusionError>
Return projected statistics
Provided Methods§
Sourcefn fmt_extra(
&self,
_t: DisplayFormatType,
_f: &mut Formatter<'_>,
) -> Result<(), Error>
fn fmt_extra( &self, _t: DisplayFormatType, _f: &mut Formatter<'_>, ) -> Result<(), Error>
Format FileType specific information
Sourcefn repartitioned(
&self,
target_partitions: usize,
repartition_file_min_size: usize,
output_ordering: Option<LexOrdering>,
config: &FileScanConfig,
) -> Result<Option<FileScanConfig>, DataFusionError>
fn repartitioned( &self, target_partitions: usize, repartition_file_min_size: usize, output_ordering: Option<LexOrdering>, config: &FileScanConfig, ) -> Result<Option<FileScanConfig>, DataFusionError>
If supported by the FileSource
, redistribute files across partitions according to their size.
Allows custom file formats to implement their own repartitioning logic.
Provides a default repartitioning behavior, see comments on FileGroupPartitioner
for more detail.