pub struct FileScanConfigBuilder { /* private fields */ }
Expand description
A builder for FileScanConfig
’s.
Example:
// Create a schema for our Parquet files
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("value", DataType::Utf8, false),
]));
// Create a builder for scanning Parquet files from a local filesystem
let config = FileScanConfigBuilder::new(
ObjectStoreUrl::local_filesystem(),
schema,
file_source,
)
// Set a limit of 1000 rows
.with_limit(Some(1000))
// Project only the first column
.with_projection(Some(vec![0]))
// Add partition columns
.with_table_partition_cols(vec![
Field::new("date", DataType::Utf8, false),
])
// Add a file group with two files
.with_file_group(FileGroup::new(vec![
PartitionedFile::new("data/date=2024-01-01/file1.parquet", 1024),
PartitionedFile::new("data/date=2024-01-01/file2.parquet", 2048),
]))
// Set compression type
.with_file_compression_type(FileCompressionType::UNCOMPRESSED)
// Build the final config
.build();
Implementations§
Source§impl FileScanConfigBuilder
impl FileScanConfigBuilder
Sourcepub fn new(
object_store_url: ObjectStoreUrl,
file_schema: SchemaRef,
file_source: Arc<dyn FileSource>,
) -> Self
pub fn new( object_store_url: ObjectStoreUrl, file_schema: SchemaRef, file_source: Arc<dyn FileSource>, ) -> Self
Create a new FileScanConfigBuilder
with default settings for scanning files.
§Parameters:
object_store_url
: SeeFileScanConfig::object_store_url
file_schema
: SeeFileScanConfig::file_schema
file_source
: SeeFileScanConfig::file_source
Sourcepub fn with_limit(self, limit: Option<usize>) -> Self
pub fn with_limit(self, limit: Option<usize>) -> Self
Set the maximum number of records to read from this plan. If None
,
all records after filtering are returned.
Sourcepub fn with_source(self, file_source: Arc<dyn FileSource>) -> Self
pub fn with_source(self, file_source: Arc<dyn FileSource>) -> Self
Set the file source for scanning files.
This method allows you to change the file source implementation (e.g. ParquetSource, CsvSource, etc.) after the builder has been created.
Sourcepub fn with_projection(self, projection: Option<Vec<usize>>) -> Self
pub fn with_projection(self, projection: Option<Vec<usize>>) -> Self
Set the columns on which to project the data. Indexes that are higher than the
number of columns of file_schema
refer to table_partition_cols
.
Sourcepub fn with_table_partition_cols(self, table_partition_cols: Vec<Field>) -> Self
pub fn with_table_partition_cols(self, table_partition_cols: Vec<Field>) -> Self
Set the partitioning columns
Sourcepub fn with_constraints(self, constraints: Constraints) -> Self
pub fn with_constraints(self, constraints: Constraints) -> Self
Set the table constraints
Sourcepub fn with_statistics(self, statistics: Statistics) -> Self
pub fn with_statistics(self, statistics: Statistics) -> Self
Set the estimated overall statistics of the files, taking filters
into account.
Defaults to Statistics::new_unknown
.
Sourcepub fn with_file_groups(self, file_groups: Vec<FileGroup>) -> Self
pub fn with_file_groups(self, file_groups: Vec<FileGroup>) -> Self
Set the list of files to be processed, grouped into partitions.
Each file must have a schema of file_schema
or a subset. If
a particular file has a subset, the missing columns are
padded with NULLs.
DataFusion may attempt to read each partition of files concurrently, however files within a partition will be read sequentially, one after the next.
Sourcepub fn with_file_group(self, file_group: FileGroup) -> Self
pub fn with_file_group(self, file_group: FileGroup) -> Self
Add a new file group
See Self::with_file_groups
for more information
Sourcepub fn with_file(self, file: PartitionedFile) -> Self
pub fn with_file(self, file: PartitionedFile) -> Self
Add a file as a single group
See Self::with_file_groups
for more information.
Sourcepub fn with_output_ordering(self, output_ordering: Vec<LexOrdering>) -> Self
pub fn with_output_ordering(self, output_ordering: Vec<LexOrdering>) -> Self
Set the output ordering of the files
Sourcepub fn with_file_compression_type(
self,
file_compression_type: FileCompressionType,
) -> Self
pub fn with_file_compression_type( self, file_compression_type: FileCompressionType, ) -> Self
Set the file compression type
Sourcepub fn with_newlines_in_values(self, new_lines_in_values: bool) -> Self
pub fn with_newlines_in_values(self, new_lines_in_values: bool) -> Self
Set whether new lines in values are supported for CSVOptions
Parsing newlines in quoted values may be affected by execution behaviour such as
parallel file scanning. Setting this to true
ensures that newlines in values are
parsed successfully, which may reduce performance.
Sourcepub fn with_batch_size(self, batch_size: Option<usize>) -> Self
pub fn with_batch_size(self, batch_size: Option<usize>) -> Self
Set the batch_size property
Sourcepub fn build(self) -> FileScanConfig
pub fn build(self) -> FileScanConfig
Build the final FileScanConfig
with all the configured settings.
This method takes ownership of the builder and returns the constructed FileScanConfig
.
Any unset optional fields will use their default values.
Trait Implementations§
Source§impl Clone for FileScanConfigBuilder
impl Clone for FileScanConfigBuilder
Source§fn clone(&self) -> FileScanConfigBuilder
fn clone(&self) -> FileScanConfigBuilder
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moreSource§impl From<FileScanConfig> for FileScanConfigBuilder
impl From<FileScanConfig> for FileScanConfigBuilder
Source§fn from(config: FileScanConfig) -> Self
fn from(config: FileScanConfig) -> Self
Auto Trait Implementations§
impl Freeze for FileScanConfigBuilder
impl !RefUnwindSafe for FileScanConfigBuilder
impl Send for FileScanConfigBuilder
impl Sync for FileScanConfigBuilder
impl Unpin for FileScanConfigBuilder
impl !UnwindSafe for FileScanConfigBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more