pub struct CsvOptions {Show 36 fields
pub schema: Option<String>,
pub sep: Option<String>,
pub delimiter: Option<String>,
pub encoding: Option<String>,
pub quote: Option<String>,
pub quote_all: Option<bool>,
pub escape: Option<String>,
pub escape_quotes: Option<bool>,
pub comment: Option<String>,
pub header: Option<bool>,
pub infer_schema: Option<bool>,
pub ignore_leading_white_space: Option<bool>,
pub ignore_trailing_white_space: Option<bool>,
pub null_value: Option<String>,
pub nan_value: Option<String>,
pub positive_inf: Option<String>,
pub negative_inf: Option<String>,
pub date_format: Option<String>,
pub timestamp_format: Option<String>,
pub timestamp_ntz_format: Option<String>,
pub enable_datetime_parsing_fallback: Option<bool>,
pub max_columns: Option<i32>,
pub max_chars_per_column: Option<i32>,
pub max_malformed_log_per_partition: Option<i32>,
pub mode: Option<String>,
pub column_name_of_corrupt_record: Option<String>,
pub multi_line: Option<bool>,
pub char_to_escape_quote_escaping: Option<String>,
pub sampling_ratio: Option<f64>,
pub prefer_date: Option<bool>,
pub enforce_schema: Option<bool>,
pub empty_value: Option<String>,
pub locale: Option<String>,
pub line_sep: Option<String>,
pub unescaped_quote_handling: Option<String>,
pub common: CommonFileOptions,
}
Expand description
A struct that represents options for configuring CSV file parsing.
CsvOptions
provides various settings to customize the reading of CSV files.
It allows users to define the format, schema inference, handling of null values,
and many other parsing behaviors. These options are used by the Spark DataFrame reader
to correctly interpret and load CSV files into a DataFrame.
§Fields
path
: Specifies the file path or directory path to the CSV file(s).schema
: Defines the schema for the CSV data. If not provided, schema will be inferred based on the data.sep
: Character used to separate fields in the CSV file. Default is a comma (,
).delimiter
: Alternative character used to separate fields in the CSV file.encoding
: Character encoding used for the CSV file. Default isUTF-8
.quote
: Character used for quoting strings. Default is a double quote ("
).quote_all
: Whether to quote all fields or only those containing special characters.escape
: Character used to escape quotes inside quoted strings. Default is a backslash (\
).comment
: Character that denotes the start of a comment line in the file.header
: Whether the first line of the CSV file is a header that contains column names. Default isfalse
.infer_schema
: Whether to infer the schema from the CSV data. Default isfalse
.ignore_leading_white_space
: Whether to ignore leading white space in fields. Default isfalse
.ignore_trailing_white_space
: Whether to ignore trailing white space in fields. Default isfalse
.null_value
: String representation of a null value in the CSV file.nan_value
: String representation of a NaN value in the CSV file.positive_inf
: String representation of positive infinity in the CSV file.negative_inf
: String representation of negative infinity in the CSV file.date_format
: Format for parsing date fields in the CSV file.timestamp_format
: Format for parsing timestamp fields in the CSV file.timestamp_ntz_format
: Format for parsing timestamp fields without timezone information.enable_datetime_parsing_fallback
: Whether to enable fallback parsing for date and time formats.max_columns
: Maximum number of columns allowed in the CSV file.max_chars_per_column
: Maximum number of characters allowed per column.max_malformed_log_per_partition
: Maximum number of malformed rows logged per partition.mode
: Handling mode for corrupt/malformed records. Options are “PERMISSIVE”, “DROPMALFORMED”, and “FAILFAST”.column_name_of_corrupt_record
: Name of the column to store malformed records.multi_line
: Whether to treat a row as spanning multiple lines. Default isfalse
.char_to_escape_quote_escaping
: Sets a character for escaping quotes inside a quoted field.sampling_ratio
: Fraction of rows used for schema inference.enforce_schema
: Whether to force schema on the CSV file.empty_value
: Representation of an empty value in the CSV file.locale
: Locale of the CSV file, used for number formatting.line_sep
: Line separator character in the CSV file.unescaped_quote_handling
: How to handle unescaped quotes in quoted fields. Options are “STOP_AT_CLOSING_QUOTE” and “BACK_TO_DELIMITER”.common
- Common file options that are shared across multiple file formats.
Fields§
§schema: Option<String>
§sep: Option<String>
§delimiter: Option<String>
§encoding: Option<String>
§quote: Option<String>
§quote_all: Option<bool>
§escape: Option<String>
§escape_quotes: Option<bool>
§comment: Option<String>
§header: Option<bool>
§infer_schema: Option<bool>
§ignore_leading_white_space: Option<bool>
§ignore_trailing_white_space: Option<bool>
§null_value: Option<String>
§nan_value: Option<String>
§positive_inf: Option<String>
§negative_inf: Option<String>
§date_format: Option<String>
§timestamp_format: Option<String>
§timestamp_ntz_format: Option<String>
§enable_datetime_parsing_fallback: Option<bool>
§max_columns: Option<i32>
§max_chars_per_column: Option<i32>
§max_malformed_log_per_partition: Option<i32>
§mode: Option<String>
§column_name_of_corrupt_record: Option<String>
§multi_line: Option<bool>
§char_to_escape_quote_escaping: Option<String>
§sampling_ratio: Option<f64>
§prefer_date: Option<bool>
§enforce_schema: Option<bool>
§empty_value: Option<String>
§locale: Option<String>
§line_sep: Option<String>
§unescaped_quote_handling: Option<String>
§common: CommonFileOptions
Implementations§
Source§impl CsvOptions
impl CsvOptions
pub fn schema(self, value: &str) -> Self
pub fn sep(self, value: &str) -> Self
pub fn delimiter(self, value: &str) -> Self
pub fn encoding(self, value: &str) -> Self
pub fn quote(self, value: &str) -> Self
pub fn quote_all(self, value: bool) -> Self
pub fn escape(self, value: &str) -> Self
pub fn comment(self, value: &str) -> Self
pub fn header(self, value: bool) -> Self
pub fn infer_schema(self, value: bool) -> Self
pub fn ignore_leading_white_space(self, value: bool) -> Self
pub fn ignore_trailing_white_space(self, value: bool) -> Self
pub fn null_value(self, value: &str) -> Self
pub fn nan_value(self, value: &str) -> Self
pub fn positive_inf(self, value: &str) -> Self
pub fn negative_inf(self, value: &str) -> Self
pub fn date_format(self, value: &str) -> Self
pub fn timestamp_format(self, value: &str) -> Self
pub fn timestamp_ntz_format(self, value: &str) -> Self
pub fn enable_datetime_parsing_fallback(self, value: bool) -> Self
pub fn max_columns(self, value: i32) -> Self
pub fn max_chars_per_column(self, value: i32) -> Self
pub fn max_malformed_log_per_partition(self, value: i32) -> Self
pub fn mode(self, value: &str) -> Self
pub fn column_name_of_corrupt_record(self, value: &str) -> Self
pub fn multi_line(self, value: bool) -> Self
pub fn char_to_escape_quote_escaping(self, value: &str) -> Self
pub fn sampling_ratio(self, value: f64) -> Self
pub fn prefer_date(self, value: bool) -> Self
pub fn enforce_schema(self, value: bool) -> Self
pub fn empty_value(self, value: &str) -> Self
pub fn locale(self, value: &str) -> Self
pub fn line_sep(self, value: &str) -> Self
pub fn unescaped_quote_handling(self, value: &str) -> Self
pub fn escape_quotes(self, value: bool) -> Self
Trait Implementations§
Source§impl Clone for CsvOptions
impl Clone for CsvOptions
Source§fn clone(&self) -> CsvOptions
fn clone(&self) -> CsvOptions
Returns a copy of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moreSource§impl ConfigOpts for CsvOptions
impl ConfigOpts for CsvOptions
Source§impl Debug for CsvOptions
impl Debug for CsvOptions
Source§impl Default for CsvOptions
impl Default for CsvOptions
Source§fn default() -> CsvOptions
fn default() -> CsvOptions
Returns the “default value” for a type. Read more
Auto Trait Implementations§
impl Freeze for CsvOptions
impl RefUnwindSafe for CsvOptions
impl Send for CsvOptions
impl Sync for CsvOptions
impl Unpin for CsvOptions
impl UnwindSafe for CsvOptions
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
Wrap the input message
T
in a tonic::Request