blackjack/dataframe/
io.rs1use std::ffi::OsStr;
6use std::path::Path;
7
8use crate::prelude::*;
9
10#[derive(Clone)]
25pub struct Reader {
26 path: String,
27 delimiter: u8,
28 terminator: csv::Terminator,
29 quote: u8,
30 has_headers: bool,
31 header: Option<Vec<String>>,
32}
33
34#[derive(Clone)]
49pub struct Writer {
50 path: String,
51 delimiter: u8,
52 terminator: csv::Terminator,
53 quote: u8,
54 has_headers: bool,
55}
56
57impl Reader {
58 pub fn new<S: AsRef<OsStr> + ToString>(path: &S) -> Self {
60 Reader {
61 path: path.to_string(),
62 delimiter: b',',
63 terminator: csv::Terminator::CRLF,
64 quote: b'"',
65 has_headers: true,
66 header: None,
67 }
68 }
69
70 pub fn headers(self, header: Vec<String>) -> Self {
72 let mut rdr = self;
73 rdr.header = Some(header);
74 rdr
75 }
76
77 pub fn has_headers(self, yes: bool) -> Self {
79 let mut rdr = self;
80 rdr.has_headers = yes;
81 rdr
82 }
83
84 pub fn quote(self, quote: u8) -> Self {
86 let mut rdr = self;
87 rdr.quote = quote;
88 rdr
89 }
90
91 pub fn delimiter(self, delimiter: u8) -> Self {
93 let mut rdr = self;
94 rdr.delimiter = delimiter;
95 rdr
96 }
97
98 pub fn terminator(self, terminator: u8) -> Self {
100 let mut rdr = self;
101 rdr.terminator = csv::Terminator::Any(terminator);
102 rdr
103 }
104
105 pub fn read(&self) -> Result<DataFrame<i32>, BlackJackError> {
108 use flate2::read::GzDecoder;
109 use std::fs::File;
110 use std::io::prelude::*;
111
112 let p = Path::new(&self.path);
113 let file_reader: Box<Read> = if self.path.to_string().to_lowercase().ends_with(".gz") {
114 Box::new(GzDecoder::new(File::open(p)?))
116 } else {
117 Box::new(File::open(p)?)
119 };
120
121 let mut reader = csv::ReaderBuilder::new()
122 .quote(self.quote)
123 .has_headers(self.has_headers)
124 .delimiter(self.delimiter)
125 .terminator(self.terminator)
126 .from_reader(file_reader);
127
128 let headers: Vec<String> = if self.has_headers {
129 reader
130 .headers()?
131 .clone()
132 .into_iter()
133 .map(|v| v.to_string())
134 .collect()
135 } else {
136 match &self.header {
137 Some(header) => header.to_owned(),
138 None => {
139 return Err(BlackJackError::ValueError(
140 r#"Reader specifies file does not have headers,
141 but no headers were supplied with Reader::header()"#
142 .to_owned(),
143 ));
144 }
145 }
146 };
147
148 let mut vecs: Vec<Vec<String>> = (0..headers.len()).map(|_| Vec::new()).collect();
150
151 for record in reader.records() {
152 match record {
153 Ok(rec) => {
154 for (field, container) in rec.iter().zip(&mut vecs) {
155 container.push(field.into());
156 }
157 }
158
159 Err(err) => println!("Unable to read record: '{}'", err),
161 }
162 }
163
164 let mut df = DataFrame::new();
165
166 let _ = headers
170 .into_iter()
171 .zip(vecs)
172 .map(|(header, vec)| {
173 let mut series = Series::from_vec(vec);
174 series.set_name(&header);
175 if let Ok(ser) = series.astype::<i32>() {
176 df.add_column(ser).unwrap();
177 } else if let Ok(ser) = series.astype::<f32>() {
178 df.add_column(ser).unwrap()
179 } else {
180 df.add_column(series).unwrap()
181 }
182 })
183 .collect::<Vec<()>>();
184 Ok(df)
185 }
186}
187
188impl Writer {
189 pub fn new<S: AsRef<OsStr> + ToString>(path: &S) -> Self {
191 Writer {
192 path: path.to_string(),
193 delimiter: b',',
194 terminator: csv::Terminator::CRLF,
195 quote: b'"',
196 has_headers: true,
197 }
198 }
199
200 pub fn has_headers(self, yes: bool) -> Self {
202 let mut wtr = self;
203 wtr.has_headers = yes;
204 wtr
205 }
206
207 pub fn quote(self, quote: u8) -> Self {
209 let mut wtr = self;
210 wtr.quote = quote;
211 wtr
212 }
213
214 pub fn delimiter(self, delimiter: u8) -> Self {
216 let mut wtr = self;
217 wtr.delimiter = delimiter;
218 wtr
219 }
220
221 pub fn terminator(self, terminator: u8) -> Self {
223 let mut wtr = self;
224 wtr.terminator = csv::Terminator::Any(terminator);
225 wtr
226 }
227
228 pub fn write<I: PartialEq + PartialOrd + BlackJackData>(
231 &self,
232 df: DataFrame<I>,
233 ) -> Result<(), BlackJackError> {
234 use flate2::read::GzEncoder;
235 use flate2::Compression;
236 use std::fs::File;
237 use std::io::prelude::*;
238
239 let p = Path::new(&self.path);
240
241 let file_writer: Box<Write> = if self.path.to_string().to_lowercase().ends_with(".gz") {
242 Box::new(GzEncoder::new(File::create(p)?, Compression::default()))
244 } else {
245 Box::new(File::create(p)?)
247 };
248
249 let mut writer = csv::WriterBuilder::new()
250 .delimiter(self.delimiter)
251 .has_headers(self.has_headers)
252 .quote(self.quote)
253 .terminator(self.terminator)
254 .from_writer(file_writer);
255
256 let header = df.columns().map(|v| v.to_string()).collect::<Vec<String>>();
257
258 let mut data = vec![];
260 for col_name in df.data.keys() {
261 let series_container = df.get_column_infer(col_name.as_str()).unwrap();
262 let string_vec = series_container.into_string_vec();
263 data.push(string_vec);
264 }
265
266 if self.has_headers {
268 writer.write_record(header.as_slice())?;
270 };
271
272 for row_idx in 0..data[0].len() {
274 let mut row = vec![];
275 for column_idx in 0..data.len() {
276 row.push(&data[column_idx][row_idx]);
277 }
278 writer.write_record(row.as_slice())?;
279 }
280
281 Ok(())
282 }
283}