pub struct ListingTable { /* private fields */ }
Expand description

Reads data from one or more files via an ObjectStore. For example, from local files or objects from AWS S3. Implements TableProvider, a DataFusion data source.

Features

  1. Merges schemas if the files have compatible but not indentical schemas

  2. Hive-style partitioning support, where a path such as /files/date=1/1/2022/data.parquet is injected as a date column.

  3. Projection pushdown for formats that support it such as such as Parquet

Example

Here is an example of reading a directory of parquet files using a ListingTable:

let ctx = SessionContext::new();
let session_state = ctx.state();
let table_path = "/path/to/parquet";

// Parse the path
let table_path = ListingTableUrl::parse(table_path)?;

// Create default parquet options
let file_format = ParquetFormat::new();
let listing_options = ListingOptions::new(Arc::new(file_format))
  .with_file_extension(".parquet");

// Resolve the schema
let resolved_schema = listing_options
   .infer_schema(&session_state, &table_path)
   .await?;

let config = ListingTableConfig::new(table_path)
  .with_listing_options(listing_options)
  .with_schema(resolved_schema);

// Create a new TableProvider
let provider = Arc::new(ListingTable::try_new(config)?);

// This provider can now be read as a dataframe:
let df = ctx.read_table(provider.clone());

// or registered as a named table:
ctx.register_table("my_table", provider);

Implementations§

source§

impl ListingTable

source

pub fn try_new(config: ListingTableConfig) -> Result<Self>

Create new ListingTable that lists the FS to get the files to scan. See ListingTable for and example.

Takes a ListingTableConfig as input which requires an ObjectStore and table_path. ListingOptions and SchemaRef are optional. If they are not provided the file type is inferred based on the file suffix. If the schema is provided then it must be resolved before creating the table and should contain the fields of the file without the table partitioning columns.

source

pub fn with_definition(self, defintion: Option<String>) -> Self

Specify the SQL definition for this table, if any

source

pub fn table_paths(&self) -> &Vec<ListingTableUrl>

Get paths ref

source

pub fn options(&self) -> &ListingOptions

Get options ref

Trait Implementations§

source§

impl TableProvider for ListingTable

source§

fn as_any(&self) -> &dyn Any

Returns the table provider as Any so that it can be downcast to a specific implementation.
source§

fn schema(&self) -> SchemaRef

Get a reference to the schema for this table
source§

fn table_type(&self) -> TableType

Get the type of this table for metadata/catalog purposes.
source§

fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, state: &'life1 SessionState, projection: Option<&'life2 Vec<usize>>, filters: &'life3 [Expr], limit: Option<usize> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Create an ExecutionPlan that will scan the table. The table provider will be usually responsible of grouping the source data into partitions that can be efficiently parallelized or distributed.
source§

fn supports_filter_pushdown( &self, filter: &Expr ) -> Result<TableProviderFilterPushDown>

👎Deprecated since 20.0.0: use supports_filters_pushdown instead
Tests whether the table provider can make use of a filter expression to optimise data retrieval.
source§

fn get_table_definition(&self) -> Option<&str>

Get the create statement used to create this table, if available.
source§

fn get_logical_plan(&self) -> Option<&LogicalPlan>

Get the Logical Plan of this table, if available.
source§

fn supports_filters_pushdown( &self, filters: &[&Expr] ) -> Result<Vec<TableProviderFilterPushDown>>

Tests whether the table provider can make use of any or all filter expressions to optimise data retrieval.
source§

fn statistics(&self) -> Option<Statistics>

Get statistics for this table, if available
source§

fn insert_into<'life0, 'life1, 'async_trait>( &'life0 self, _state: &'life1 SessionState, _input: Arc<dyn ExecutionPlan> ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Insert into this table

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Same<T> for T

§

type Output = T

Should always be Self
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

fn vzip(self) -> V

source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more