Struct polars_io::csv::CsvReader

source · [−]

pub struct CsvReader<'a, R> where
    R: MmapBytesReader,  { /* private fields */ }

Available on crate feature csv-file only.

Expand description

Create a new DataFrame by reading a csv file.

Example

use polars_core::prelude::*;
use polars_io::prelude::*;
use std::fs::File;

fn example() -> Result<DataFrame> {
    CsvReader::from_path("iris_csv")?
            .has_header(true)
            .finish()
}

Implementations

source

impl<'a, R> CsvReader<'a, R> where
R: 'a + MmapBytesReader,

source

pub fn with_skip_rows_after_header(self, offset: usize) -> Self

Skip these rows after the header

source

pub fn with_row_count(self, rc: Option<RowCount>) -> Self

Add a row_count column.

source

pub fn with_chunk_size(self, chunk_size: usize) -> Self

Sets the chunk size used by the parser. This influences performance

source

pub fn with_encoding(self, enc: CsvEncoding) -> Self

Set CsvEncoding

source

pub fn with_n_rows(self, num_rows: Option<usize>) -> Self

Try to stop parsing when n rows are parsed. During multithreaded parsing the upper bound n cannot be guaranteed.

source

pub fn with_ignore_parser_errors(self, ignore: bool) -> Self

Continue with next batch when a ParserError is encountered.

source

pub fn with_schema(self, schema: &'a Schema) -> Self

Set the CSV file’s schema. This only accepts datatypes that are implemented in the csv parser and expects a complete Schema.

It is recommended to use with_dtypes instead.

source

pub fn with_skip_rows(self, skip_rows: usize) -> Self

Skip the first n rows during parsing. The header will be parsed an n lines.

source

pub fn with_rechunk(self, rechunk: bool) -> Self

Rechunk the DataFrame to contiguous memory after the CSV is parsed.

source

pub fn has_header(self, has_header: bool) -> Self

Set whether the CSV file has headers

source

pub fn with_delimiter(self, delimiter: u8) -> Self

Set the CSV file’s column delimiter as a byte character

source

pub fn with_comment_char(self, comment_char: Option<u8>) -> Self

Set the comment character. Lines starting with this character will be ignored.

source

pub fn with_null_values(self, null_values: Option<NullValues>) -> Self

Set values that will be interpreted as missing/ null. Note that any value you set as null value will not be escaped, so if quotation marks are part of the null value you should include them.

source

pub fn with_dtypes(self, schema: Option<&'a Schema>) -> Self

Overwrite the schema with the dtypes in this given Schema. The given schema may be a subset of the total schema.

source

pub fn with_dtypes_slice(self, dtypes: Option<&'a [DataType ]>) -> Self

Overwrite the dtypes in the schema in the order of the slice that’s given. This is useful if you don’t know the column names beforehand

source

pub fn infer_schema(self, max_records: Option<usize>) -> Self

Set the CSV reader to infer the schema of the file

Arguments

max_records - Maximum number of rows read for schema inference. Setting this to None will do a full table scan (slow).

source

pub fn with_projection(self, projection: Option<Vec<usize>>) -> Self

Set the reader’s column projection. This counts from 0, meaning that vec![0, 4] would select the 1st and 5th column.

source

pub fn with_columns(self, columns: Option<Vec<String>>) -> Self

Columns to select/ project

source

pub fn with_n_threads(self, n: Option<usize>) -> Self

Set the number of threads used in CSV reading. The default uses the number of cores of your cpu.

Note that this only works if this is initialized with CsvReader::from_path. Note that the number of cores is the maximum allowed number of threads.

source

pub fn with_path<P: Into<PathBuf>>(self, path: Option) -> Self

The preferred way to initialize this builder. This allows the CSV file to be memory mapped and thereby greatly increases parsing performance.

source

pub fn sample_size(self, size: usize) -> Self

Sets the size of the sample taken from the CSV file. The sample is used to get statistic about the file. These statistics are used to try to optimally allocate up front. Increasing this may improve performance.

source

pub fn low_memory(self, toggle: bool) -> Self

Reduce memory consumption at the expense of performance

source

pub fn with_quote_char(self, quote: Option<u8>) -> Self

Set the char used as quote char. The default is b'"'. If set to [None] quoting is disabled.

source

pub fn with_parse_dates(self, toggle: bool) -> Self

Automatically try to parse dates/ datetimes and time. If parsing fails, columns remain of dtype [DataType::Utf8].

source

pub fn from_path<P: Into<PathBuf>>(path: P) -> Result<Self>

This is the recommended way to create a csv reader as this allows for fastest parsing.

Trait Implementations

source

impl<'a, R> SerReader<R> for CsvReader<'a, R> where
R: MmapBytesReader,

source

fn new(reader: R) -> Self

Create a new CsvReader from a file/ stream

source

fn finish(self) -> Result<DataFrame>

Read the file and create the DataFrame.

source

fn set_rechunk(self, _rechunk: bool) -> Self where
Self: Sized,

Rechunk to a single chunk after Reading file.

Auto Trait Implementations

impl<'a, R> !RefUnwindSafe for CsvReader<'a, R>

impl<'a, R> Send for CsvReader<'a, R>

impl<'a, R> Sync for CsvReader<'a, R>

impl<'a, R> Unpin for CsvReader<'a, R> where
R: Unpin,

impl<'a, R> !UnwindSafe for CsvReader<'a, R>

Blanket Implementations

source

impl<T> Any for T where
T: 'static + ?Sized,

source

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

source

impl<T> Borrow<T> for T where
T: ?Sized,

const: unstable · source

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

source

impl<T> BorrowMut<T> for T where
T: ?Sized,

const: unstable · source

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

source

impl<T> From<T> for T

const: unstable · source

fn from(t: T) -> T

Returns the argument unchanged.

source

impl<T, U> Into for T where
U: From<T>,

const: unstable · source

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::()

The alignment of pointer.

type Init = T

The type for initializers.

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

source

impl<T, U> TryFrom for T where
U: Into<T>,

type Error = Infallible

The type returned in the event of a conversion error.

const: unstable · source

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

source

impl<T, U> TryInto for T where
U: TryFrom<T>,

type Error = >::Error

The type returned in the event of a conversion error.

const: unstable · source

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct polars_io::csv::CsvReader

Implementations

impl<'a, R> CsvReader<'a, R> where R: 'a + MmapBytesReader,

pub fn with_skip_rows_after_header(self, offset: usize) -> Self

pub fn with_row_count(self, rc: Option<RowCount>) -> Self

pub fn with_chunk_size(self, chunk_size: usize) -> Self

pub fn with_encoding(self, enc: CsvEncoding) -> Self

pub fn with_n_rows(self, num_rows: Option<usize>) -> Self

pub fn with_ignore_parser_errors(self, ignore: bool) -> Self

pub fn with_schema(self, schema: &'a Schema) -> Self

pub fn with_skip_rows(self, skip_rows: usize) -> Self

pub fn with_rechunk(self, rechunk: bool) -> Self

pub fn has_header(self, has_header: bool) -> Self

pub fn with_delimiter(self, delimiter: u8) -> Self

pub fn with_comment_char(self, comment_char: Option<u8>) -> Self

pub fn with_null_values(self, null_values: Option<NullValues>) -> Self

pub fn with_dtypes(self, schema: Option<&'a Schema>) -> Self

pub fn with_dtypes_slice(self, dtypes: Option<&'a [DataType]>) -> Self

pub fn infer_schema(self, max_records: Option<usize>) -> Self

pub fn with_projection(self, projection: Option<Vec<usize>>) -> Self

pub fn with_columns(self, columns: Option<Vec<String>>) -> Self

pub fn with_n_threads(self, n: Option<usize>) -> Self

pub fn with_path<P: Into<PathBuf>>(self, path: Option<P>) -> Self

pub fn sample_size(self, size: usize) -> Self

pub fn low_memory(self, toggle: bool) -> Self

pub fn with_quote_char(self, quote: Option<u8>) -> Self

pub fn with_parse_dates(self, toggle: bool) -> Self

pub fn with_predicate(self, predicate: Option<Arc<dyn PhysicalIoExpr>>) -> Self

pub fn with_aggregate(self, aggregate: Option<&'a [ScanAggregation]>) -> Self

impl<'a> CsvReader<'a, File>

pub fn from_path<P: Into<PathBuf>>(path: P) -> Result<Self>

Trait Implementations

impl<'a, R> SerReader<R> for CsvReader<'a, R> where R: MmapBytesReader,

fn new(reader: R) -> Self

fn finish(self) -> Result<DataFrame>

fn set_rechunk(self, _rechunk: bool) -> Self where Self: Sized,

Auto Trait Implementations

impl<'a, R> !RefUnwindSafe for CsvReader<'a, R>

impl<'a, R> Send for CsvReader<'a, R>

impl<'a, R> Sync for CsvReader<'a, R>

impl<'a, R> Unpin for CsvReader<'a, R> where R: Unpin,

impl<'a, R> !UnwindSafe for CsvReader<'a, R>

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

fn into(self) -> U

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::()

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T, U> TryFrom<U> for T where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<'a, R> CsvReader<'a, R> where
R: 'a + MmapBytesReader,

pub fn with_dtypes_slice(self, dtypes: Option<&'a [DataType ]>) -> Self

pub fn with_aggregate(self, aggregate: Option<&'a [ScanAggregation ]>) -> Self

impl<'a, R> SerReader<R> for CsvReader<'a, R> where
R: MmapBytesReader,

fn set_rechunk(self, _rechunk: bool) -> Self where
Self: Sized,

impl<'a, R> Unpin for CsvReader<'a, R> where
R: Unpin,

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,