Struct datafusion::datasource::physical_plan::FileScanConfig

source ·

pub struct FileScanConfig {
    pub object_store_url: ObjectStoreUrl,
    pub file_schema: SchemaRef,
    pub file_groups: Vec<Vec<PartitionedFile>>,
    pub statistics: Statistics,
    pub projection: Option<Vec<usize>>,
    pub limit: Option<usize>,
    pub table_partition_cols: Vec<(String, DataType)>,
    pub output_ordering: Vec<LexOrdering>,
    pub infinite_source: bool,
}

Expand description

The base configurations to provide when creating a physical plan for any given file format.

Fields§

§object_store_url: ObjectStoreUrl

Object store URL, used to get an ObjectStore instance from RuntimeEnv::object_store

§file_schema: SchemaRef

Schema before projection is applied. It contains the all columns that may appear in the files. It does not include table partition columns that may be added.

§file_groups: Vec<Vec<PartitionedFile>>

List of files to be processed, grouped into partitions

Each file must have a schema of file_schema or a subset. If a particular file has a subset, the missing columns are padded with NULLs.

DataFusion may attempt to read each partition of files concurrently, however files within a partition will be read sequentially, one after the next.

§statistics: Statistics

Estimated overall statistics of the files, taking filters into account.

§projection: Option<Vec<usize>>

Columns on which to project the data. Indexes that are higher than the number of columns of file_schema refer to table_partition_cols.

§limit: Option<usize>

The maximum number of records to read from this plan. If None, all records after filtering are returned.

§table_partition_cols: Vec<(String, DataType)>

The partitioning columns

§output_ordering: Vec<LexOrdering>

All equivalent lexicographical orderings that describe the schema.

§infinite_source: bool

Indicates whether this plan may produce an infinite stream of records.

Struct datafusion::datasource::physical_plan::FileScanConfig

Fields§

Implementations§

impl FileScanConfig

pub fn project(&self) -> (SchemaRef, Statistics, Vec<LexOrdering>)

pub fn repartition_file_groups( file_groups: Vec<Vec<PartitionedFile>>, target_partitions: usize, repartition_file_min_size: usize ) -> Option<Vec<Vec<PartitionedFile>>>

Trait Implementations§

impl Clone for FileScanConfig

fn clone(&self) -> FileScanConfig

fn clone_from(&mut self, source: &Self)

impl Debug for FileScanConfig

fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult

impl DisplayAs for FileScanConfig

fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> FmtResult

Auto Trait Implementations§

impl !RefUnwindSafe for FileScanConfig

impl Send for FileScanConfig

impl Sync for FileScanConfig

impl Unpin for FileScanConfig

impl !UnwindSafe for FileScanConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Same<T> for T

type Output = T

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V