Trait DataSource

Source
pub trait DataSource:
    Send
    + Sync
    + Debug {
    // Required methods
    fn open(
        &self,
        partition: usize,
        context: Arc<TaskContext>,
    ) -> Result<SendableRecordBatchStream>;
    fn as_any(&self) -> &dyn Any;
    fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> Result;
    fn output_partitioning(&self) -> Partitioning;
    fn eq_properties(&self) -> EquivalenceProperties;
    fn statistics(&self) -> Result<Statistics>;
    fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn DataSource>>;
    fn fetch(&self) -> Option<usize>;
    fn try_swapping_with_projection(
        &self,
        _projection: &ProjectionExec,
    ) -> Result<Option<Arc<dyn ExecutionPlan>>>;

    // Provided methods
    fn repartitioned(
        &self,
        _target_partitions: usize,
        _repartition_file_min_size: usize,
        _output_ordering: Option<LexOrdering>,
    ) -> Result<Option<Arc<dyn DataSource>>> { ... }
    fn metrics(&self) -> ExecutionPlanMetricsSet { ... }
    fn try_pushdown_filters(
        &self,
        filters: Vec<Arc<dyn PhysicalExpr>>,
        _config: &ConfigOptions,
    ) -> Result<FilterPushdownPropagation<Arc<dyn DataSource>>> { ... }
}
Expand description

A source of data, typically a list of files or memory

This trait provides common behaviors for abstract sources of data. It has two common implementations:

  1. FileScanConfig: lists of files
  2. MemorySourceConfig: in memory list of RecordBatch

File format specific behaviors are defined by FileSource

§See Also

§Notes

Requires Debug to assist debugging

The following diagram shows how DataSource, FileSource, and DataSourceExec are related

                      ┌─────────────────────┐                              -----► execute path
                      │                     │                              ┄┄┄┄┄► init path
                      │   DataSourceExec    │  
                      │                     │    
                      └───────▲─────────────┘
                              ┊  │
                              ┊  │
                      ┌──────────▼──────────┐                            ┌──────────-──────────┐
                      │                     │                            |                     |
                      │  DataSource(trait)  │                            | TableProvider(trait)|
                      │                     │                            |                     |
                      └───────▲─────────────┘                            └─────────────────────┘
                              ┊  │                                                  ┊
              ┌───────────────┿──┴────────────────┐                                 ┊
              |   ┌┄┄┄┄┄┄┄┄┄┄┄┘                   |                                 ┊
              |   ┊                               |                                 ┊
   ┌──────────▼──────────┐             ┌──────────▼──────────┐                      ┊
   │                     │             │                     │           ┌──────────▼──────────┐
   │   FileScanConfig    │             │ MemorySourceConfig  │           |                     |
   │                     │             │                     │           |  FileFormat(trait)  |
   └──────────────▲──────┘             └─────────────────────┘           |                     |
              │   ┊                                                      └─────────────────────┘
              │   ┊                                                                 ┊
              │   ┊                                                                 ┊
   ┌──────────▼──────────┐                                               ┌──────────▼──────────┐
   │                     │                                               │     ArrowSource     │
   │ FileSource(trait)   ◄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄│          ...        │
   │                     │                                               │    ParquetSource    │
   └─────────────────────┘                                               └─────────────────────┘
              │
              │
              │
              │
   ┌──────────▼──────────┐
   │     ArrowSource     │
   │          ...        │
   │    ParquetSource    │
   └─────────────────────┘
              |
FileOpener (called by FileStream)
              │
   ┌──────────▼──────────┐
   │                     │
   │     RecordBatch     │
   │                     │
   └─────────────────────┘

Required Methods§

Source

fn open( &self, partition: usize, context: Arc<TaskContext>, ) -> Result<SendableRecordBatchStream>

Source

fn as_any(&self) -> &dyn Any

Source

fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> Result

Format this source for display in explain plans

Source

fn output_partitioning(&self) -> Partitioning

Source

fn eq_properties(&self) -> EquivalenceProperties

Source

fn statistics(&self) -> Result<Statistics>

Source

fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn DataSource>>

Return a copy of this DataSource with a new fetch limit

Source

fn fetch(&self) -> Option<usize>

Source

fn try_swapping_with_projection( &self, _projection: &ProjectionExec, ) -> Result<Option<Arc<dyn ExecutionPlan>>>

Provided Methods§

Source

fn repartitioned( &self, _target_partitions: usize, _repartition_file_min_size: usize, _output_ordering: Option<LexOrdering>, ) -> Result<Option<Arc<dyn DataSource>>>

Return a copy of this DataSource with a new partitioning scheme.

Returns Ok(None) (the default) if the partitioning cannot be changed. Refer to ExecutionPlan::repartitioned for details on when None should be returned.

Repartitioning should not change the output ordering, if this ordering exists. Refer to MemorySourceConfig::repartition_preserving_order and the FileSource’s FileGroupPartitioner::repartition_file_groups for examples.

Source

fn metrics(&self) -> ExecutionPlanMetricsSet

Source

fn try_pushdown_filters( &self, filters: Vec<Arc<dyn PhysicalExpr>>, _config: &ConfigOptions, ) -> Result<FilterPushdownPropagation<Arc<dyn DataSource>>>

Try to push down filters into this DataSource. See ExecutionPlan::handle_child_pushdown_result for more details.

Implementors§