pub struct CsvFormat { /* private fields */ }
Expand description
Character Separated Value FileFormat
implementation.
Implementations§
Source§impl CsvFormat
impl CsvFormat
Sourcepub async fn read_to_delimited_chunks_from_stream<'a>(
&self,
stream: BoxStream<'a, Result<Bytes>>,
) -> BoxStream<'a, Result<Bytes>>
pub async fn read_to_delimited_chunks_from_stream<'a>( &self, stream: BoxStream<'a, Result<Bytes>>, ) -> BoxStream<'a, Result<Bytes>>
Convert a stream of bytes into a stream of of Bytes
containing newline
delimited CSV records, while accounting for \
and "
.
Sourcepub fn with_options(self, options: CsvOptions) -> Self
pub fn with_options(self, options: CsvOptions) -> Self
Set the csv options
Sourcepub fn options(&self) -> &CsvOptions
pub fn options(&self) -> &CsvOptions
Retrieve the csv options
Sourcepub fn with_schema_infer_max_rec(self, max_rec: usize) -> Self
pub fn with_schema_infer_max_rec(self, max_rec: usize) -> Self
Set a limit in terms of records to scan to infer the schema
- default to
DEFAULT_SCHEMA_INFER_MAX_RECORD
Sourcepub fn with_has_header(self, has_header: bool) -> Self
pub fn with_has_header(self, has_header: bool) -> Self
Set true to indicate that the first line is a header.
- default to true
Sourcepub fn with_null_regex(self, null_regex: Option<String>) -> Self
pub fn with_null_regex(self, null_regex: Option<String>) -> Self
Set the regex to use for null values in the CSV reader.
- default to treat empty values as null.
Sourcepub fn has_header(&self) -> Option<bool>
pub fn has_header(&self) -> Option<bool>
Returns Some(true)
if the first line is a header, Some(false)
if
it is not, and None
if it is not specified.
Sourcepub fn with_comment(self, comment: Option<u8>) -> Self
pub fn with_comment(self, comment: Option<u8>) -> Self
Lines beginning with this byte are ignored.
Sourcepub fn with_delimiter(self, delimiter: u8) -> Self
pub fn with_delimiter(self, delimiter: u8) -> Self
The character separating values within a row.
- default to ‘,’
Sourcepub fn with_quote(self, quote: u8) -> Self
pub fn with_quote(self, quote: u8) -> Self
The quote character in a row.
- default to ‘“’
Sourcepub fn with_escape(self, escape: Option<u8>) -> Self
pub fn with_escape(self, escape: Option<u8>) -> Self
The escape character in a row.
- default is None
Sourcepub fn with_terminator(self, terminator: Option<u8>) -> Self
pub fn with_terminator(self, terminator: Option<u8>) -> Self
The character used to indicate the end of a row.
- default to None (CRLF)
Sourcepub fn with_newlines_in_values(self, newlines_in_values: bool) -> Self
pub fn with_newlines_in_values(self, newlines_in_values: bool) -> Self
Specifies whether newlines in (quoted) values are supported.
Parsing newlines in quoted values may be affected by execution behaviour such as
parallel file scanning. Setting this to true
ensures that newlines in values are
parsed successfully, which may reduce performance.
The default behaviour depends on the datafusion.catalog.newlines_in_values
setting.
Sourcepub fn with_file_compression_type(
self,
file_compression_type: FileCompressionType,
) -> Self
pub fn with_file_compression_type( self, file_compression_type: FileCompressionType, ) -> Self
Set a FileCompressionType
of CSV
- defaults to
FileCompressionType::UNCOMPRESSED
Source§impl CsvFormat
impl CsvFormat
Sourcepub async fn infer_schema_from_stream(
&self,
state: &dyn Session,
records_to_read: usize,
stream: impl Stream<Item = Result<Bytes>>,
) -> Result<(Schema, usize)>
pub async fn infer_schema_from_stream( &self, state: &dyn Session, records_to_read: usize, stream: impl Stream<Item = Result<Bytes>>, ) -> Result<(Schema, usize)>
Return the inferred schema reading up to records_to_read from a stream of delimited chunks returning the inferred schema and the number of lines that were read
Trait Implementations§
Source§impl FileFormat for CsvFormat
impl FileFormat for CsvFormat
Source§fn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Any
so that it can be
downcast to a specific implementation.Source§fn get_ext_with_compression(
&self,
file_compression_type: &FileCompressionType,
) -> Result<String>
fn get_ext_with_compression( &self, file_compression_type: &FileCompressionType, ) -> Result<String>
Source§fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
store: &'life2 Arc<dyn ObjectStore>,
objects: &'life3 [ObjectMeta],
) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn infer_schema<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
store: &'life2 Arc<dyn ObjectStore>,
objects: &'life3 [ObjectMeta],
) -> Pin<Box<dyn Future<Output = Result<SchemaRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
Source§fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
_store: &'life2 Arc<dyn ObjectStore>,
table_schema: SchemaRef,
_object: &'life3 ObjectMeta,
) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn infer_stats<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
_store: &'life2 Arc<dyn ObjectStore>,
table_schema: SchemaRef,
_object: &'life3 ObjectMeta,
) -> Pin<Box<dyn Future<Output = Result<Statistics>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
Source§fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
conf: FileScanConfig,
_filters: Option<&'life2 Arc<dyn PhysicalExpr>>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
fn create_physical_plan<'life0, 'life1, 'life2, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
conf: FileScanConfig,
_filters: Option<&'life2 Arc<dyn PhysicalExpr>>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
Source§fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
input: Arc<dyn ExecutionPlan>,
state: &'life1 dyn Session,
conf: FileSinkConfig,
order_requirements: Option<LexRequirement>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn create_writer_physical_plan<'life0, 'life1, 'async_trait>(
&'life0 self,
input: Arc<dyn ExecutionPlan>,
state: &'life1 dyn Session,
conf: FileSinkConfig,
order_requirements: Option<LexRequirement>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Source§fn file_source(&self) -> Arc<dyn FileSource>
fn file_source(&self) -> Arc<dyn FileSource>
CsvSource
, JsonSource
, etc.Source§fn supports_filters_pushdown(
&self,
_file_schema: &Schema,
_table_schema: &Schema,
_filters: &[&Expr],
) -> Result<FilePushdownSupport, DataFusionError>
fn supports_filters_pushdown( &self, _file_schema: &Schema, _table_schema: &Schema, _filters: &[&Expr], ) -> Result<FilePushdownSupport, DataFusionError>
Auto Trait Implementations§
impl Freeze for CsvFormat
impl RefUnwindSafe for CsvFormat
impl Send for CsvFormat
impl Sync for CsvFormat
impl Unpin for CsvFormat
impl UnwindSafe for CsvFormat
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more