Struct polars::prelude::CsvReader[−][src]

pub struct CsvReader<'a, R> where
    R: MmapBytesReader,  { /* fields omitted */ }

Expand description

Create a new DataFrame by reading a csv file.

Example

use polars_core::prelude::*;
use polars_io::prelude::*;
use std::fs::File;

fn example() -> Result<DataFrame> {
    CsvReader::from_path("iris_csv")?
            .infer_schema(None)
            .has_header(true)
            .finish()
}

Implementations

[src]

impl<'a, R> CsvReader<'a, R> where
R: 'a + MmapBytesReader,

[src]

pub fn with_chunk_size(self, chunk_size: usize) -> CsvReader<'a, R>

Sets the chunk size used by the parser. This influences performance

[src]

pub fn with_encoding(self, enc: CsvEncoding) -> CsvReader<'a, R>

Sets the CsvEncoding

[src]

pub fn with_stop_after_n_rows(self, num_rows: Option<usize>) -> CsvReader<'a, R>

Try to stop parsing when n rows are parsed. During multithreaded parsing the upper bound n cannot be guaranteed.

[src]

pub fn with_ignore_parser_errors(self, ignore: bool) -> CsvReader<'a, R>

Continue with next batch when a ParserError is encountered.

[src]

pub fn with_schema(self, schema: &'a Schema) -> CsvReader<'a, R>

Set the CSV file’s schema. This only accepts datatypes that are implemented in the csv parser and expects a complete Schema.

It is recommended to use with_dtypes instead.

[src]

pub fn with_skip_rows(self, skip_rows: usize) -> CsvReader<'a, R>

Skip the first n rows during parsing.

[src]

pub fn with_rechunk(self, rechunk: bool) -> CsvReader<'a, R>

Rechunk the DataFrame to contiguous memory after the CSV is parsed.

[src]

pub fn has_header(self, has_header: bool) -> CsvReader<'a, R>

Set whether the CSV file has headers

[src]

pub fn with_delimiter(self, delimiter: u8) -> CsvReader<'a, R>

Set the CSV file’s column delimiter as a byte character

[src]

pub fn with_comment_char(self, comment_char: Option<u8>) -> CsvReader<'a, R>

Set the comment character. Lines starting with this character will be ignored.

[src]

pub fn with_null_values(
self,
null_values: Option<NullValues>
) -> CsvReader<'a, R>

Set values that will be interpreted as missing/ null.

[src]

pub fn with_dtypes(self, schema: Option<&'a Schema>) -> CsvReader<'a, R>

Overwrite the schema with the dtypes in this given Schema. The given schema may be a subset of the total schema.

[src]

pub fn with_dtypes_slice(
self,
dtypes: Option<&'a [DataType ]>
) -> CsvReader<'a, R>

Overwrite the dtypes in the schema in the order of the slice that’s given. This is useful if you don’t know the column names beforehand

[src]

pub fn infer_schema(self, max_records: Option<usize>) -> CsvReader<'a, R>

Set the CSV reader to infer the schema of the file

Arguments

max_records - Maximum number of rows read for schema inference. Setting this to None will do a full table scan (slow).

[src]

pub fn with_projection(
self,
projection: Option<Vec<usize, Global>>
) -> CsvReader<'a, R>

Set the reader’s column projection. This counts from 0, meaning that vec![0, 4] would select the 1st and 5th column.

[src]

pub fn with_columns(
self,
columns: Option<Vec<String, Global>>
) -> CsvReader<'a, R>

Columns to select/ project

[src]

pub fn with_n_threads(self, n: Option<usize>) -> CsvReader<'a, R>

Set the number of threads used in CSV reading. The default uses the number of cores of your cpu.

Note that this only works if this is initialized with CsvReader::from_path. Note that the number of cores is the maximum allowed number of threads.

[src]

pub fn with_path(self, path: Option) -> CsvReader<'a, R> where
P: Into<PathBuf>,

The preferred way to initialize this builder. This allows the CSV file to be memory mapped and thereby greatly increases parsing performance.

[src]

pub fn sample_size(self, size: usize) -> CsvReader<'a, R>

Sets the size of the sample taken from the CSV file. The sample is used to get statistic about the file. These statistics are used to try to optimally allocate up front. Increasing this may improve performance.

[src]