Struct datafusion::datasource::listing::ListingOptions

source ·

pub struct ListingOptions {
    pub file_extension: String,
    pub format: Arc<dyn FileFormat>,
    pub table_partition_cols: Vec<(String, DataType)>,
    pub collect_stat: bool,
    pub target_partitions: usize,
    pub file_sort_order: Option<Vec<Expr>>,
    pub infinite_source: bool,
}

Expand description

Options for creating a ListingTable

Fields§

§file_extension: String

A suffix on which files should be filtered (leave empty to keep all files on the path)

§format: Arc<dyn FileFormat>

The file format

§table_partition_cols: Vec<(String, DataType)>

The expected partition column names in the folder structure. See Self::with_table_partition_cols for details

§collect_stat: bool

Set true to try to guess statistics from the files. This can add a lot of overhead as it will usually require files to be opened and at least partially parsed.

§target_partitions: usize

Group files to avoid that the number of partitions exceeds this limit

§file_sort_order: Option<Vec<Expr>>

Optional pre-known sort order. Must be SortExprs.

DataFusion may take advantage of this ordering to omit sorts or use more efficient algorithms. Currently sortedness must be provided if it is known by some external mechanism, but may in the future be automatically determined, for example using parquet metadata.

See https://github.com/apache/arrow-datafusion/issues/4177

§infinite_source: bool

Infinite source means that the input is not guaranteed to end. Currently, CSV, JSON, and AVRO formats are supported. In order to support infinite inputs, DataFusion may adjust query plans (e.g. joins) to run the given query in full pipelining mode.

Struct datafusion::datasource::listing::ListingOptions

Fields§

Implementations§

impl ListingOptions

pub fn new(format: Arc<dyn FileFormat>) -> Self

pub fn with_infinite_source(self, infinite_source: bool) -> Self

pub fn with_file_extension(self, file_extension: impl Into<String>) -> Self

pub fn with_table_partition_cols( self, table_partition_cols: Vec<(String, DataType)> ) -> Self

pub fn with_collect_stat(self, collect_stat: bool) -> Self

pub fn with_target_partitions(self, target_partitions: usize) -> Self

pub fn with_file_sort_order(self, file_sort_order: Option<Vec<Expr>>) -> Self

pub async fn infer_schema<'a>( &'a self, state: &SessionState, table_path: &'a ListingTableUrl ) -> Result<SchemaRef>

Trait Implementations§

impl Clone for ListingOptions

fn clone(&self) -> ListingOptions

fn clone_from(&mut self, source: &Self)

impl Debug for ListingOptions

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl !RefUnwindSafe for ListingOptions

impl Send for ListingOptions

impl Sync for ListingOptions

impl Unpin for ListingOptions

impl !UnwindSafe for ListingOptions

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Same<T> for T

type Output = T

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>