1pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::Ix2;
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25 pub delimiter: char,
27 pub fmt: Option<String>,
30 pub header: Option<String>,
32 pub footer: Option<String>,
34 pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39 fn default() -> Self {
40 Self {
41 delimiter: ',',
42 fmt: None,
43 header: None,
44 footer: None,
45 newline: "\n".to_string(),
46 }
47 }
48}
49
50pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
56 path: P,
57 array: &Array<T, Ix2>,
58 opts: &SaveTxtOptions,
59) -> FerrayResult<()> {
60 let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
61 FerrayError::io_error(format!(
62 "failed to create file '{}': {e}",
63 path.as_ref().display()
64 ))
65 })?;
66
67 savetxt_to_writer(&mut file, array, opts)
68}
69
70pub fn savetxt_to_writer<T: Element + Display, W: Write>(
72 writer: &mut W,
73 array: &Array<T, Ix2>,
74 opts: &SaveTxtOptions,
75) -> FerrayResult<()> {
76 let shape = array.shape();
77 let nrows = shape[0];
78 let ncols = shape[1];
79
80 if let Some(ref header) = opts.header {
81 write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
82 writer
83 .write_all(opts.newline.as_bytes())
84 .map_err(|e| FerrayError::io_error(e.to_string()))?;
85 }
86
87 let slice = array
88 .as_slice()
89 .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
90
91 for row in 0..nrows {
92 for col in 0..ncols {
93 if col > 0 {
94 write!(writer, "{}", opts.delimiter)
95 .map_err(|e| FerrayError::io_error(e.to_string()))?;
96 }
97 let val = &slice[row * ncols + col];
98 if let Some(ref fmt_str) = opts.fmt {
99 let formatted = fmt_str.replace("{}", &val.to_string());
102 write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
103 } else {
104 write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
105 }
106 }
107 writer
108 .write_all(opts.newline.as_bytes())
109 .map_err(|e| FerrayError::io_error(e.to_string()))?;
110 }
111
112 if let Some(ref footer) = opts.footer {
113 write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
114 writer
115 .write_all(opts.newline.as_bytes())
116 .map_err(|e| FerrayError::io_error(e.to_string()))?;
117 }
118
119 writer
120 .flush()
121 .map_err(|e| FerrayError::io_error(e.to_string()))?;
122 Ok(())
123}
124
125pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
136where
137 T: Element + FromStr,
138 T::Err: Display,
139 P: AsRef<Path>,
140{
141 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
142 FerrayError::io_error(format!(
143 "failed to read file '{}': {e}",
144 path.as_ref().display()
145 ))
146 })?;
147
148 loadtxt_from_str(&content, delimiter, skiprows)
149}
150
151pub fn loadtxt_from_str<T>(
153 content: &str,
154 delimiter: char,
155 skiprows: usize,
156) -> FerrayResult<Array<T, Ix2>>
157where
158 T: Element + FromStr,
159 T::Err: Display,
160{
161 let opts = TextParseOptions {
162 delimiter,
163 skiprows,
164 ..Default::default()
165 };
166
167 let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
168
169 if nrows == 0 {
170 return Array::from_vec(Ix2::new([0, 0]), vec![]);
171 }
172
173 let data: FerrayResult<Vec<T>> = cells
174 .iter()
175 .enumerate()
176 .map(|(i, cell)| {
177 cell.parse::<T>().map_err(|e| {
178 let row = i / ncols;
179 let col = i % ncols;
180 FerrayError::io_error(format!(
181 "failed to parse value '{cell}' at row {row}, col {col}: {e}"
182 ))
183 })
184 })
185 .collect();
186
187 let data = data?;
188 Array::from_vec(Ix2::new([nrows, ncols]), data)
189}
190
191pub fn genfromtxt<P: AsRef<Path>>(
202 path: P,
203 delimiter: char,
204 filling_value: f64,
205 skiprows: usize,
206 missing_values: &[&str],
207) -> FerrayResult<Array<f64, Ix2>> {
208 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
209 FerrayError::io_error(format!(
210 "failed to read file '{}': {e}",
211 path.as_ref().display()
212 ))
213 })?;
214
215 genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
216}
217
218pub fn genfromtxt_from_str(
220 content: &str,
221 delimiter: char,
222 filling_value: f64,
223 skiprows: usize,
224 missing_values: &[&str],
225) -> FerrayResult<Array<f64, Ix2>> {
226 let opts = TextParseOptions {
227 delimiter,
228 skiprows,
229 ..Default::default()
230 };
231
232 let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
234 for mv in missing_values {
235 if !all_missing.contains(mv) {
236 all_missing.push(mv);
237 }
238 }
239
240 let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
241
242 if nrows == 0 {
243 return Array::from_vec(Ix2::new([0, 0]), vec![]);
244 }
245
246 let data: FerrayResult<Vec<f64>> = cells
247 .iter()
248 .enumerate()
249 .map(|(i, cell)| match cell {
250 None => Ok(filling_value),
251 Some(s) => s.parse::<f64>().map_err(|e| {
252 let row = i / ncols;
253 let col = i % ncols;
254 FerrayError::io_error(format!(
255 "failed to parse value '{s}' at row {row}, col {col}: {e}"
256 ))
257 }),
258 })
259 .collect();
260
261 let data = data?;
262 Array::from_vec(Ix2::new([nrows, ncols]), data)
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 #[test]
270 fn loadtxt_simple_csv() {
271 let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
272 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
273 assert_eq!(arr.shape(), &[2, 3]);
274 assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
275 }
276
277 #[test]
278 fn loadtxt_with_skiprows() {
279 let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
280 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
281 assert_eq!(arr.shape(), &[2, 2]);
282 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
283 }
284
285 #[test]
286 fn loadtxt_tab_delimited() {
287 let content = "1\t2\t3\n4\t5\t6\n";
288 let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
289 assert_eq!(arr.shape(), &[2, 3]);
290 assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
291 }
292
293 #[test]
294 fn loadtxt_integers() {
295 let content = "10,20\n30,40\n";
296 let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
297 assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
298 }
299
300 #[test]
301 fn loadtxt_file_roundtrip() {
302 let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
303 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
304
305 let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
306 let _ = std::fs::create_dir_all(&dir);
307 let path = dir.join("test.csv");
308
309 savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
310 let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
311
312 assert_eq!(loaded.shape(), &[2, 3]);
313 assert_eq!(loaded.as_slice().unwrap(), &data[..]);
314 let _ = std::fs::remove_file(&path);
315 }
316
317 #[test]
318 fn savetxt_custom_delimiter() {
319 let data = vec![1.0f64, 2.0, 3.0, 4.0];
320 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
321
322 let mut buf = Vec::new();
323 let opts = SaveTxtOptions {
324 delimiter: '\t',
325 ..Default::default()
326 };
327 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
328 let output = String::from_utf8(buf).unwrap();
329 assert!(output.contains('\t'));
330 assert!(!output.contains(','));
331 }
332
333 #[test]
334 fn savetxt_with_header_footer() {
335 let data = vec![1.0f64, 2.0];
336 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
337
338 let mut buf = Vec::new();
339 let opts = SaveTxtOptions {
340 header: Some("# my header".to_string()),
341 footer: Some("# end".to_string()),
342 ..Default::default()
343 };
344 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
345 let output = String::from_utf8(buf).unwrap();
346 assert!(output.starts_with("# my header\n"));
347 assert!(output.ends_with("# end\n"));
348 }
349
350 #[test]
351 fn genfromtxt_missing_nan() {
352 let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
353 let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
354 assert_eq!(arr.shape(), &[3, 3]);
355 let slice = arr.as_slice().unwrap();
356 assert_eq!(slice[0], 1.0);
357 assert!(slice[4].is_nan()); assert!(slice[8].is_nan()); }
360
361 #[test]
362 fn genfromtxt_na_marker() {
363 let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
364 let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
365 assert_eq!(arr.shape(), &[2, 3]);
366 let slice = arr.as_slice().unwrap();
367 assert_eq!(slice[1], -999.0);
368 assert_eq!(slice[5], -999.0);
369 }
370
371 #[test]
372 fn genfromtxt_with_skiprows() {
373 let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
374 let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
375 assert_eq!(arr.shape(), &[2, 2]);
376 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
377 }
378
379 #[test]
380 fn genfromtxt_file() {
381 let content = "1.0,2.0\n,4.0\n";
382 let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
383 let _ = std::fs::create_dir_all(&dir);
384 let path = dir.join("genfromtxt_test.csv");
385 std::fs::write(&path, content).unwrap();
386
387 let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
388 assert_eq!(arr.shape(), &[2, 2]);
389 assert!(arr.as_slice().unwrap()[2].is_nan());
390 let _ = std::fs::remove_file(&path);
391 }
392
393 #[test]
394 fn loadtxt_empty() {
395 let content = "";
396 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
397 assert_eq!(arr.shape(), &[0, 0]);
398 }
399}