Skip to main content

alopex_dataframe/io/
options.rs

1use crate::Expr;
2
3/// Options for reading CSV files.
4#[derive(Debug, Clone)]
5pub struct CsvReadOptions {
6    /// Whether the CSV file has a header row.
7    pub has_header: bool,
8    /// Field delimiter byte (e.g. `b','`).
9    pub delimiter: u8,
10    /// Quote character byte (defaults to `Some(b'\"')`).
11    pub quote_char: Option<u8>,
12    /// Values that should be interpreted as null.
13    pub null_values: Vec<String>,
14    /// Maximum number of rows used for schema inference.
15    pub infer_schema_length: usize,
16    /// Optional column projection (names).
17    pub projection: Option<Vec<String>>,
18    /// Optional predicate expression to apply (may be pushed down in the future).
19    pub predicate: Option<Expr>,
20}
21
22impl Default for CsvReadOptions {
23    fn default() -> Self {
24        Self {
25            has_header: true,
26            delimiter: b',',
27            quote_char: Some(b'"'),
28            null_values: Vec::new(),
29            infer_schema_length: 100,
30            projection: None,
31            predicate: None,
32        }
33    }
34}
35
36impl CsvReadOptions {
37    /// Set `has_header`.
38    pub fn with_has_header(mut self, has_header: bool) -> Self {
39        self.has_header = has_header;
40        self
41    }
42
43    /// Set `delimiter`.
44    pub fn with_delimiter(mut self, delimiter: u8) -> Self {
45        self.delimiter = delimiter;
46        self
47    }
48
49    /// Set `quote_char`.
50    pub fn with_quote_char(mut self, quote_char: Option<u8>) -> Self {
51        self.quote_char = quote_char;
52        self
53    }
54
55    /// Set `null_values`.
56    pub fn with_null_values<I, S>(mut self, values: I) -> Self
57    where
58        I: IntoIterator<Item = S>,
59        S: Into<String>,
60    {
61        self.null_values = values.into_iter().map(Into::into).collect();
62        self
63    }
64
65    /// Set `infer_schema_length`.
66    pub fn with_infer_schema_length(mut self, infer_schema_length: usize) -> Self {
67        self.infer_schema_length = infer_schema_length;
68        self
69    }
70
71    /// Set a column projection by name.
72    pub fn with_projection<I, S>(mut self, columns: I) -> Self
73    where
74        I: IntoIterator<Item = S>,
75        S: Into<String>,
76    {
77        self.projection = Some(columns.into_iter().map(Into::into).collect());
78        self
79    }
80
81    /// Set a predicate to apply.
82    pub fn with_predicate(mut self, predicate: Expr) -> Self {
83        self.predicate = Some(predicate);
84        self
85    }
86}
87
88/// Options for reading Parquet files.
89#[derive(Debug, Clone)]
90pub struct ParquetReadOptions {
91    /// Optional column projection (names).
92    pub columns: Option<Vec<String>>,
93    /// Optional row group selection.
94    pub row_groups: Option<Vec<usize>>,
95    /// Record batch size for the Parquet reader.
96    pub batch_size: usize,
97    /// Optional predicate expression to apply (may be pushed down in the future).
98    pub predicate: Option<Expr>,
99}
100
101impl Default for ParquetReadOptions {
102    fn default() -> Self {
103        Self {
104            columns: None,
105            row_groups: None,
106            batch_size: 65_536,
107            predicate: None,
108        }
109    }
110}
111
112impl ParquetReadOptions {
113    /// Set a column projection by name.
114    pub fn with_columns<I, S>(mut self, columns: I) -> Self
115    where
116        I: IntoIterator<Item = S>,
117        S: Into<String>,
118    {
119        self.columns = Some(columns.into_iter().map(Into::into).collect());
120        self
121    }
122
123    /// Set row group indices to read.
124    pub fn with_row_groups<I>(mut self, row_groups: I) -> Self
125    where
126        I: IntoIterator<Item = usize>,
127    {
128        self.row_groups = Some(row_groups.into_iter().collect());
129        self
130    }
131
132    /// Set record batch size.
133    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
134        self.batch_size = batch_size;
135        self
136    }
137
138    /// Set a predicate to apply.
139    pub fn with_predicate(mut self, predicate: Expr) -> Self {
140        self.predicate = Some(predicate);
141        self
142    }
143}