Struct FileScanConfigBuilder

Source
pub struct FileScanConfigBuilder { /* private fields */ }
Expand description

A builder for FileScanConfig’s.

Example:


    // Create a schema for our Parquet files
    let schema = Arc::new(Schema::new(vec![
        Field::new("id", DataType::Int32, false),
        Field::new("value", DataType::Utf8, false),
    ]));

    // Create a builder for scanning Parquet files from a local filesystem
    let config = FileScanConfigBuilder::new(
        ObjectStoreUrl::local_filesystem(),
        schema,
        file_source,
    )
    // Set a limit of 1000 rows
    .with_limit(Some(1000))
    // Project only the first column
    .with_projection(Some(vec![0]))
    // Add partition columns
    .with_table_partition_cols(vec![
        Field::new("date", DataType::Utf8, false),
    ])
    // Add a file group with two files
    .with_file_group(FileGroup::new(vec![
        PartitionedFile::new("data/date=2024-01-01/file1.parquet", 1024),
        PartitionedFile::new("data/date=2024-01-01/file2.parquet", 2048),
    ]))
    // Set compression type
    .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
    // Build the final config
    .build();

Implementations§

Source§

impl FileScanConfigBuilder

Source

pub fn new( object_store_url: ObjectStoreUrl, file_schema: SchemaRef, file_source: Arc<dyn FileSource>, ) -> Self

Create a new FileScanConfigBuilder with default settings for scanning files.

§Parameters:
Source

pub fn with_limit(self, limit: Option<usize>) -> Self

Set the maximum number of records to read from this plan. If None, all records after filtering are returned.

Source

pub fn with_source(self, file_source: Arc<dyn FileSource>) -> Self

Set the file source for scanning files.

This method allows you to change the file source implementation (e.g. ParquetSource, CsvSource, etc.) after the builder has been created.

Source

pub fn with_projection(self, projection: Option<Vec<usize>>) -> Self

Set the columns on which to project the data. Indexes that are higher than the number of columns of file_schema refer to table_partition_cols.

Source

pub fn with_table_partition_cols(self, table_partition_cols: Vec<Field>) -> Self

Set the partitioning columns

Source

pub fn with_constraints(self, constraints: Constraints) -> Self

Set the table constraints

Source

pub fn with_statistics(self, statistics: Statistics) -> Self

Set the estimated overall statistics of the files, taking filters into account. Defaults to Statistics::new_unknown.

Source

pub fn with_file_groups(self, file_groups: Vec<FileGroup>) -> Self

Set the list of files to be processed, grouped into partitions.

Each file must have a schema of file_schema or a subset. If a particular file has a subset, the missing columns are padded with NULLs.

DataFusion may attempt to read each partition of files concurrently, however files within a partition will be read sequentially, one after the next.

Source

pub fn with_file_group(self, file_group: FileGroup) -> Self

Add a new file group

See Self::with_file_groups for more information

Source

pub fn with_file(self, file: PartitionedFile) -> Self

Add a file as a single group

See Self::with_file_groups for more information.

Source

pub fn with_output_ordering(self, output_ordering: Vec<LexOrdering>) -> Self

Set the output ordering of the files

Source

pub fn with_file_compression_type( self, file_compression_type: FileCompressionType, ) -> Self

Set the file compression type

Source

pub fn with_newlines_in_values(self, new_lines_in_values: bool) -> Self

Set whether new lines in values are supported for CSVOptions

Parsing newlines in quoted values may be affected by execution behaviour such as parallel file scanning. Setting this to true ensures that newlines in values are parsed successfully, which may reduce performance.

Source

pub fn with_batch_size(self, batch_size: Option<usize>) -> Self

Set the batch_size property

Source

pub fn build(self) -> FileScanConfig

Build the final FileScanConfig with all the configured settings.

This method takes ownership of the builder and returns the constructed FileScanConfig. Any unset optional fields will use their default values.

Trait Implementations§

Source§

impl Clone for FileScanConfigBuilder

Source§

fn clone(&self) -> FileScanConfigBuilder

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl From<FileScanConfig> for FileScanConfigBuilder

Source§

fn from(config: FileScanConfig) -> Self

Converts to this type from the input type.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> ErasedDestructor for T
where T: 'static,