use arrow::datatypes::SchemaRef;
use std::any::Any;
use std::string::String;
use std::sync::Arc;
use crate::datasource::datasource::Statistics;
use crate::datasource::TableProvider;
use crate::error::{DataFusionError, Result};
use crate::logical_plan::Expr;
use crate::physical_plan::csv::CsvExec;
pub use crate::physical_plan::csv::CsvReadOptions;
use crate::physical_plan::{common, ExecutionPlan};
pub struct CsvFile {
path: String,
schema: SchemaRef,
has_header: bool,
delimiter: u8,
file_extension: String,
statistics: Statistics,
}
impl CsvFile {
pub fn try_new(path: &str, options: CsvReadOptions) -> Result<Self> {
let schema = Arc::new(match options.schema {
Some(s) => s.clone(),
None => {
let mut filenames: Vec<String> = vec![];
common::build_file_list(path, &mut filenames, options.file_extension)?;
if filenames.is_empty() {
return Err(DataFusionError::Plan(format!(
"No files found at {path} with file extension {file_extension}",
path = path,
file_extension = options.file_extension
)));
}
CsvExec::try_infer_schema(&filenames, &options)?
}
});
Ok(Self {
path: String::from(path),
schema,
has_header: options.has_header,
delimiter: options.delimiter,
file_extension: String::from(options.file_extension),
statistics: Statistics::default(),
})
}
pub fn path(&self) -> &str {
&self.path
}
pub fn has_header(&self) -> bool {
self.has_header
}
pub fn delimiter(&self) -> u8 {
self.delimiter
}
pub fn file_extension(&self) -> &str {
&self.file_extension
}
}
impl TableProvider for CsvFile {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn scan(
&self,
projection: &Option<Vec<usize>>,
batch_size: usize,
_filters: &[Expr],
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>> {
Ok(Arc::new(CsvExec::try_new(
&self.path,
CsvReadOptions::new()
.schema(&self.schema)
.has_header(self.has_header)
.delimiter(self.delimiter)
.file_extension(self.file_extension.as_str()),
projection.clone(),
limit
.map(|l| std::cmp::min(l, batch_size))
.unwrap_or(batch_size),
limit,
)?))
}
fn statistics(&self) -> Statistics {
self.statistics.clone()
}
}