Struct arrow_csv::reader::ReaderBuilder
source · pub struct ReaderBuilder { /* private fields */ }
Expand description
CSV file reader builder
Implementations§
source§impl ReaderBuilder
impl ReaderBuilder
sourcepub fn new() -> ReaderBuilder
pub fn new() -> ReaderBuilder
Create a new builder for configuring CSV parsing options.
To convert a builder into a reader, call ReaderBuilder::build
Example
use arrow_csv::{Reader, ReaderBuilder};
use std::fs::File;
fn example() -> Reader<File> {
let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
// create a builder, inferring the schema with the first 100 records
let builder = ReaderBuilder::new().infer_schema(Some(100));
let reader = builder.build(file).unwrap();
reader
}
sourcepub fn with_schema(self, schema: SchemaRef) -> Self
pub fn with_schema(self, schema: SchemaRef) -> Self
Set the CSV file’s schema
sourcepub fn has_header(self, has_header: bool) -> Self
pub fn has_header(self, has_header: bool) -> Self
Set whether the CSV file has headers
sourcepub fn with_datetime_re(self, datetime_re: Regex) -> Self
pub fn with_datetime_re(self, datetime_re: Regex) -> Self
Set the datetime regex used to parse the string to Date64Type this regex is used while infering schema
sourcepub fn with_datetime_format(self, datetime_format: String) -> Self
pub fn with_datetime_format(self, datetime_format: String) -> Self
Set the datetime fromat used to parse the string to Date64Type this fromat is used while when the schema wants to parse Date64Type.
For format refer to chrono docs
sourcepub fn with_delimiter(self, delimiter: u8) -> Self
pub fn with_delimiter(self, delimiter: u8) -> Self
Set the CSV file’s column delimiter as a byte character
pub fn with_escape(self, escape: u8) -> Self
pub fn with_quote(self, quote: u8) -> Self
pub fn with_terminator(self, terminator: u8) -> Self
sourcepub fn infer_schema(self, max_records: Option<usize>) -> Self
pub fn infer_schema(self, max_records: Option<usize>) -> Self
Set the CSV reader to infer the schema of the file
sourcepub fn with_batch_size(self, batch_size: usize) -> Self
pub fn with_batch_size(self, batch_size: usize) -> Self
Set the batch size (number of records to load at one time)
sourcepub fn with_bounds(self, start: usize, end: usize) -> Self
pub fn with_bounds(self, start: usize, end: usize) -> Self
Set the bounds over which to scan the reader.
start
and end
are line numbers.
sourcepub fn with_projection(self, projection: Vec<usize>) -> Self
pub fn with_projection(self, projection: Vec<usize>) -> Self
Set the reader’s column projection
sourcepub fn build<R: Read + Seek>(self, reader: R) -> Result<Reader<R>, ArrowError>
pub fn build<R: Read + Seek>(self, reader: R) -> Result<Reader<R>, ArrowError>
Create a new Reader
from a non-buffered reader
If R: BufRead
consider using Self::build_buffered
to avoid unnecessary additional
buffering, as internally this method wraps reader
in std::io::BufReader
sourcepub fn build_buffered<R: BufRead + Seek>(
self,
reader: R
) -> Result<BufReader<R>, ArrowError>
pub fn build_buffered<R: BufRead + Seek>(
self,
reader: R
) -> Result<BufReader<R>, ArrowError>
Create a new BufReader
from a buffered reader