Struct polars_lazy::frame::LazyCsvReader
source · pub struct LazyCsvReader<'a> { /* private fields */ }
csv
only.Implementations§
source§impl<'a> LazyCsvReader<'a>
impl<'a> LazyCsvReader<'a>
pub fn new_paths(paths: Arc<[PathBuf]>) -> Self
pub fn new(path: impl AsRef<Path>) -> Self
sourcepub fn with_skip_rows_after_header(self, offset: usize) -> Self
pub fn with_skip_rows_after_header(self, offset: usize) -> Self
Skip this number of rows after the header location.
sourcepub fn with_row_count(self, row_count: Option<RowCount>) -> Self
pub fn with_row_count(self, row_count: Option<RowCount>) -> Self
Add a row_count
column.
sourcepub fn with_n_rows(self, num_rows: Option<usize>) -> Self
pub fn with_n_rows(self, num_rows: Option<usize>) -> Self
Try to stop parsing when n
rows are parsed. During multithreaded parsing the upper bound n
cannot
be guaranteed.
sourcepub fn with_infer_schema_length(self, num_rows: Option<usize>) -> Self
pub fn with_infer_schema_length(self, num_rows: Option<usize>) -> Self
Set the number of rows to use when inferring the csv schema.
the default is 100 rows.
Setting to None
will do a full table scan, very slow.
sourcepub fn with_ignore_errors(self, ignore: bool) -> Self
pub fn with_ignore_errors(self, ignore: bool) -> Self
Continue with next batch when a ParserError is encountered.
sourcepub fn with_schema(self, schema: Option<SchemaRef>) -> Self
pub fn with_schema(self, schema: Option<SchemaRef>) -> Self
Set the CSV file’s schema
sourcepub fn with_skip_rows(self, skip_rows: usize) -> Self
pub fn with_skip_rows(self, skip_rows: usize) -> Self
Skip the first n
rows during parsing. The header will be parsed at row n
.
sourcepub fn with_dtype_overwrite(self, schema: Option<&'a Schema>) -> Self
pub fn with_dtype_overwrite(self, schema: Option<&'a Schema>) -> Self
Overwrite the schema with the dtypes in this given Schema. The given schema may be a subset of the total schema.
sourcepub fn has_header(self, has_header: bool) -> Self
pub fn has_header(self, has_header: bool) -> Self
Set whether the CSV file has headers
sourcepub fn with_separator(self, separator: u8) -> Self
pub fn with_separator(self, separator: u8) -> Self
Set the CSV file’s column separator as a byte character
sourcepub fn with_comment_char(self, comment_char: Option<u8>) -> Self
pub fn with_comment_char(self, comment_char: Option<u8>) -> Self
Set the comment character. Lines starting with this character will be ignored.
sourcepub fn with_quote_char(self, quote: Option<u8>) -> Self
pub fn with_quote_char(self, quote: Option<u8>) -> Self
Set the char
used as quote char. The default is b'"'
. If set to [None]
quoting is disabled.
sourcepub fn with_end_of_line_char(self, eol_char: u8) -> Self
pub fn with_end_of_line_char(self, eol_char: u8) -> Self
Set the char
used as end of line. The default is b'\n'
.
sourcepub fn with_null_values(self, null_values: Option<NullValues>) -> Self
pub fn with_null_values(self, null_values: Option<NullValues>) -> Self
Set values that will be interpreted as missing/ null.
sourcepub fn with_missing_is_null(self, missing_is_null: bool) -> Self
pub fn with_missing_is_null(self, missing_is_null: bool) -> Self
Treat missing fields as null.
sourcepub fn with_cache(self, cache: bool) -> Self
pub fn with_cache(self, cache: bool) -> Self
Cache the DataFrame after reading.
sourcepub fn low_memory(self, toggle: bool) -> Self
pub fn low_memory(self, toggle: bool) -> Self
Reduce memory usage in expensive of performance
sourcepub fn with_encoding(self, enc: CsvEncoding) -> Self
pub fn with_encoding(self, enc: CsvEncoding) -> Self
Set CsvEncoding
sourcepub fn with_try_parse_dates(self, toggle: bool) -> Self
Available on crate feature temporal
only.
pub fn with_try_parse_dates(self, toggle: bool) -> Self
temporal
only.Automatically try to parse dates/datetimes and time.
If parsing fails, columns remain of dtype [DataType::Utf8]
.
sourcepub fn raise_if_empty(self, toggle: bool) -> Self
pub fn raise_if_empty(self, toggle: bool) -> Self
Raise an error if CSV is empty (otherwise return an empty frame)
sourcepub fn truncate_ragged_lines(self, toggle: bool) -> Self
pub fn truncate_ragged_lines(self, toggle: bool) -> Self
Truncate lines that are longer than the schema.
sourcepub fn with_schema_modify<F>(self, f: F) -> PolarsResult<Self>where
F: Fn(Schema) -> PolarsResult<Schema>,
pub fn with_schema_modify<F>(self, f: F) -> PolarsResult<Self>where F: Fn(Schema) -> PolarsResult<Schema>,
Modify a schema before we run the lazy scanning.
Important! Run this function latest in the builder!
Trait Implementations§
source§impl<'a> Clone for LazyCsvReader<'a>
impl<'a> Clone for LazyCsvReader<'a>
source§fn clone(&self) -> LazyCsvReader<'a>
fn clone(&self) -> LazyCsvReader<'a>
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moresource§impl LazyFileListReader for LazyCsvReader<'_>
impl LazyFileListReader for LazyCsvReader<'_>
source§fn with_rechunk(self, toggle: bool) -> Self
fn with_rechunk(self, toggle: bool) -> Self
Rechunk the memory to contiguous chunks when parsing is done.
source§fn n_rows(&self) -> Option<usize>
fn n_rows(&self) -> Option<usize>
Try to stop parsing when n
rows are parsed. During multithreaded parsing the upper bound n
cannot
be guaranteed.