pub struct CsvSource { /* private fields */ }Expand description
A Config for CsvOpener
§Example: create a DataSourceExec for CSV
let options = CsvOptions {
has_header: Some(true),
delimiter: b',',
quote: b'"',
newlines_in_values: Some(true), // The file contains newlines in values
..Default::default()
};
let source = Arc::new(CsvSource::new(file_schema.clone())
.with_csv_options(options)
.with_terminator(Some(b'#'))
);
// Create a DataSourceExec for reading the first 100MB of `file1.csv`
let config = FileScanConfigBuilder::new(object_store_url, source)
.with_file(PartitionedFile::new("file1.csv", 100*1024*1024))
.build();
let exec = (DataSourceExec::from_data_source(config));Implementations§
Source§impl CsvSource
impl CsvSource
Sourcepub fn new(table_schema: impl Into<TableSchema>) -> Self
pub fn new(table_schema: impl Into<TableSchema>) -> Self
Returns a CsvSource
Sourcepub fn with_csv_options(self, options: CsvOptions) -> Self
pub fn with_csv_options(self, options: CsvOptions) -> Self
Sets the CSV options
Sourcepub fn has_header(&self) -> bool
pub fn has_header(&self) -> bool
true if the first line of each file is a header
pub fn truncate_rows(&self) -> bool
Sourcepub fn terminator(&self) -> Option<u8>
pub fn terminator(&self) -> Option<u8>
The line terminator
Sourcepub fn with_escape(&self, escape: Option<u8>) -> Self
pub fn with_escape(&self, escape: Option<u8>) -> Self
Initialize a CsvSource with escape
Sourcepub fn with_terminator(&self, terminator: Option<u8>) -> Self
pub fn with_terminator(&self, terminator: Option<u8>) -> Self
Initialize a CsvSource with terminator
Sourcepub fn with_comment(&self, comment: Option<u8>) -> Self
pub fn with_comment(&self, comment: Option<u8>) -> Self
Initialize a CsvSource with comment
Sourcepub fn with_truncate_rows(&self, truncate_rows: bool) -> Self
pub fn with_truncate_rows(&self, truncate_rows: bool) -> Self
Whether to support truncate rows when read csv file
Sourcepub fn newlines_in_values(&self) -> bool
pub fn newlines_in_values(&self) -> bool
Whether values may contain newline characters
Trait Implementations§
Source§impl FileSource for CsvSource
impl FileSource for CsvSource
Source§fn create_file_opener(
&self,
object_store: Arc<dyn ObjectStore>,
base_config: &FileScanConfig,
partition_index: usize,
) -> Result<Arc<dyn FileOpener>>
fn create_file_opener( &self, object_store: Arc<dyn ObjectStore>, base_config: &FileScanConfig, partition_index: usize, ) -> Result<Arc<dyn FileOpener>>
Creates a
dyn FileOpener based on given parametersSource§fn table_schema(&self) -> &TableSchema
fn table_schema(&self) -> &TableSchema
Returns the table schema for this file source. Read more
Source§fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>
fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>
Initialize new type with batch size configuration
Source§fn try_pushdown_projection(
&self,
projection: &ProjectionExprs,
) -> Result<Option<Arc<dyn FileSource>>>
fn try_pushdown_projection( &self, projection: &ProjectionExprs, ) -> Result<Option<Arc<dyn FileSource>>>
Try to push down a projection into a this FileSource. Read more
Source§fn projection(&self) -> Option<&ProjectionExprs>
fn projection(&self) -> Option<&ProjectionExprs>
Return the projection that will be applied to the output stream on top of the table schema.
Source§fn metrics(&self) -> &ExecutionPlanMetricsSet
fn metrics(&self) -> &ExecutionPlanMetricsSet
Return execution plan metrics
Source§fn file_type(&self) -> &str
fn file_type(&self) -> &str
String representation of file source such as “csv”, “json”, “parquet”
Source§fn supports_repartitioning(&self) -> bool
fn supports_repartitioning(&self) -> bool
Returns whether this file source supports repartitioning files by byte ranges. Read more
Source§fn fmt_extra(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> Result
fn fmt_extra(&self, t: DisplayFormatType, f: &mut Formatter<'_>) -> Result
Format FileType specific information
Source§fn filter(&self) -> Option<Arc<dyn PhysicalExpr>>
fn filter(&self) -> Option<Arc<dyn PhysicalExpr>>
Returns the filter expression that will be applied during the file scan.
Source§fn repartitioned(
&self,
target_partitions: usize,
repartition_file_min_size: usize,
output_ordering: Option<LexOrdering>,
config: &FileScanConfig,
) -> Result<Option<FileScanConfig>, DataFusionError>
fn repartitioned( &self, target_partitions: usize, repartition_file_min_size: usize, output_ordering: Option<LexOrdering>, config: &FileScanConfig, ) -> Result<Option<FileScanConfig>, DataFusionError>
If supported by the
FileSource, redistribute files across partitions
according to their size. Allows custom file formats to implement their
own repartitioning logic. Read moreSource§fn try_pushdown_filters(
&self,
filters: Vec<Arc<dyn PhysicalExpr>>,
_config: &ConfigOptions,
) -> Result<FilterPushdownPropagation<Arc<dyn FileSource>>, DataFusionError>
fn try_pushdown_filters( &self, filters: Vec<Arc<dyn PhysicalExpr>>, _config: &ConfigOptions, ) -> Result<FilterPushdownPropagation<Arc<dyn FileSource>>, DataFusionError>
Try to push down filters into this FileSource.
See
ExecutionPlan::handle_child_pushdown_result for more details.Source§fn try_reverse_output(
&self,
_order: &[PhysicalSortExpr],
_eq_properties: &EquivalenceProperties,
) -> Result<SortOrderPushdownResult<Arc<dyn FileSource>>, DataFusionError>
fn try_reverse_output( &self, _order: &[PhysicalSortExpr], _eq_properties: &EquivalenceProperties, ) -> Result<SortOrderPushdownResult<Arc<dyn FileSource>>, DataFusionError>
Try to create a new FileSource that can produce data in the specified sort order. Read more
Source§fn with_schema_adapter_factory(
&self,
_factory: Arc<dyn SchemaAdapterFactory>,
) -> Result<Arc<dyn FileSource>, DataFusionError>
fn with_schema_adapter_factory( &self, _factory: Arc<dyn SchemaAdapterFactory>, ) -> Result<Arc<dyn FileSource>, DataFusionError>
👎Deprecated since 52.0.0: SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details.
Deprecated: Set optional schema adapter factory. Read more
Source§fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>>
fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>>
👎Deprecated since 52.0.0: SchemaAdapterFactory has been removed. Use PhysicalExprAdapterFactory instead. See upgrading.md for more details.
Deprecated: Returns the current schema adapter factory if set. Read more
Auto Trait Implementations§
impl Freeze for CsvSource
impl !RefUnwindSafe for CsvSource
impl Send for CsvSource
impl Sync for CsvSource
impl Unpin for CsvSource
impl !UnwindSafe for CsvSource
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more