1use std::path::{Path, PathBuf};
2
3use polars::frame::DataFrame;
4use polars::prelude::*;
5
6use super::PlotlarsError;
7
8#[derive(Clone)]
26pub struct CsvReader {
27 path: PathBuf,
28 delimiter: Option<u8>,
29 has_header: Option<bool>,
30 skip_rows: Option<usize>,
31 null_values: Option<Vec<String>>,
32 try_parse_dates: Option<bool>,
33}
34
35impl CsvReader {
36 pub fn new(path: impl AsRef<Path>) -> Self {
38 Self {
39 path: path.as_ref().to_path_buf(),
40 delimiter: None,
41 has_header: None,
42 skip_rows: None,
43 null_values: None,
44 try_parse_dates: None,
45 }
46 }
47
48 pub fn delimiter(mut self, delimiter: u8) -> Self {
50 self.delimiter = Some(delimiter);
51 self
52 }
53
54 pub fn has_header(mut self, has_header: bool) -> Self {
56 self.has_header = Some(has_header);
57 self
58 }
59
60 pub fn skip_rows(mut self, skip_rows: usize) -> Self {
62 self.skip_rows = Some(skip_rows);
63 self
64 }
65
66 pub fn null_values(mut self, null_values: Vec<&str>) -> Self {
68 self.null_values = Some(null_values.into_iter().map(|s| s.to_string()).collect());
69 self
70 }
71
72 pub fn try_parse_dates(mut self, try_parse_dates: bool) -> Self {
74 self.try_parse_dates = Some(try_parse_dates);
75 self
76 }
77
78 pub fn finish(self) -> Result<DataFrame, PlotlarsError> {
85 let path_str = self.path.display().to_string();
86
87 let mut options =
88 CsvReadOptions::default().with_has_header(self.has_header.unwrap_or(true));
89
90 if let Some(skip) = self.skip_rows {
91 options = options.with_skip_rows(skip);
92 }
93
94 let mut parse_options = CsvParseOptions::default();
95
96 if let Some(delim) = self.delimiter {
97 parse_options = parse_options.with_separator(delim);
98 }
99
100 if let Some(nulls) = self.null_values {
101 let nulls: Vec<PlSmallStr> = nulls.into_iter().map(PlSmallStr::from).collect();
102 parse_options = parse_options.with_null_values(Some(NullValues::AllColumns(nulls)));
103 }
104
105 if let Some(try_dates) = self.try_parse_dates {
106 parse_options = parse_options.with_try_parse_dates(try_dates);
107 }
108
109 options = options.with_parse_options(parse_options);
110
111 options
112 .try_into_reader_with_file_path(Some(self.path))
113 .map_err(|e| PlotlarsError::CsvParse {
114 path: path_str.clone(),
115 source: Box::new(e),
116 })?
117 .finish()
118 .map_err(|e| PlotlarsError::CsvParse {
119 path: path_str,
120 source: Box::new(e),
121 })
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 use super::*;
128
129 fn data_path(name: &str) -> String {
130 format!("{}/../../data/{}", env!("CARGO_MANIFEST_DIR"), name)
131 }
132
133 #[test]
134 fn read_csv_default() {
135 let df = CsvReader::new(data_path("penguins.csv")).finish().unwrap();
136 assert!(df.height() > 0);
137 assert!(df.width() > 0);
138 }
139
140 #[test]
141 fn read_csv_with_options() {
142 let df = CsvReader::new(data_path("penguins.csv"))
143 .has_header(true)
144 .try_parse_dates(false)
145 .finish()
146 .unwrap();
147 assert!(df.height() > 0);
148 }
149
150 #[test]
151 fn read_csv_file_not_found() {
152 let result = CsvReader::new("nonexistent.csv").finish();
153 assert!(result.is_err());
154 }
155
156 #[test]
157 fn read_csv_custom_delimiter() {
158 let df = CsvReader::new(data_path("penguins.csv"))
159 .delimiter(b',')
160 .finish()
161 .unwrap();
162 assert!(df.height() > 0);
163 }
164
165 #[test]
166 fn read_csv_skip_rows() {
167 let df_full = CsvReader::new(data_path("animal_statistics.csv"))
168 .finish()
169 .unwrap();
170 let df_skip = CsvReader::new(data_path("animal_statistics.csv"))
171 .skip_rows(2)
172 .finish()
173 .unwrap();
174 assert_eq!(df_full.height() - 2, df_skip.height());
175 }
176
177 #[test]
178 fn read_csv_null_values() {
179 let df = CsvReader::new(data_path("penguins.csv"))
180 .null_values(vec!["NA", ""])
181 .finish()
182 .unwrap();
183 assert!(df.height() > 0);
184 }
185}