use std::sync::Arc;
use arrow::datatypes::{Field, SchemaRef};
use datafusion::{
datasource::{listing::PartitionedFile, physical_plan::FileScanConfig},
execution::object_store::ObjectStoreUrl,
physical_expr::LexOrdering,
physical_plan::Statistics,
};
pub struct FileScanConfigBuilder {
object_store_url: ObjectStoreUrl,
file_schema: SchemaRef,
file_groups: Vec<Vec<PartitionedFile>>,
statistics: Statistics,
projection: Option<Vec<usize>>,
limit: Option<usize>,
output_ordering: Vec<LexOrdering>,
table_partition_cols: Vec<Field>,
}
impl FileScanConfigBuilder {
pub fn new(
object_store_url: ObjectStoreUrl,
file_schema: SchemaRef,
file_groups: Vec<Vec<PartitionedFile>>,
) -> Self {
let statistics = Statistics::new_unknown(&Arc::clone(&file_schema));
Self {
object_store_url,
file_schema,
file_groups,
statistics,
projection: None,
limit: None,
output_ordering: Vec::new(),
table_partition_cols: Vec::new(),
}
}
pub fn projection_option(mut self, projection: Option<Vec<usize>>) -> Self {
self.projection = projection;
self
}
pub fn limit_option(mut self, limit: Option<usize>) -> Self {
self.limit = limit;
self
}
pub fn table_partition_cols(mut self, table_partition_cols: Vec<Field>) -> Self {
self.table_partition_cols = table_partition_cols;
self
}
pub fn build(self) -> FileScanConfig {
FileScanConfig {
object_store_url: self.object_store_url,
file_schema: self.file_schema,
file_groups: self.file_groups,
statistics: self.statistics,
projection: self.projection,
limit: self.limit,
output_ordering: self.output_ordering,
table_partition_cols: self.table_partition_cols,
}
}
}