1pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::Ix2;
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25 pub delimiter: char,
27 pub fmt: Option<String>,
30 pub header: Option<String>,
32 pub footer: Option<String>,
34 pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39 fn default() -> Self {
40 Self {
41 delimiter: ',',
42 fmt: None,
43 header: None,
44 footer: None,
45 newline: "\n".to_string(),
46 }
47 }
48}
49
50fn format_value<T: Display>(val: &T, fmt_str: &str) -> String {
59 if fmt_str.contains('{') {
61 if let Some(spec) = fmt_str.strip_prefix("{:").and_then(|s| s.strip_suffix('}')) {
62 if let Some(prec_str) = spec.strip_prefix('.') {
63 let is_sci = prec_str.ends_with('e') || prec_str.ends_with('E');
64 let digits_str = if is_sci {
65 &prec_str[..prec_str.len() - 1]
66 } else {
67 prec_str
68 };
69 if let Ok(prec) = digits_str.parse::<usize>() {
70 if let Ok(v) = val.to_string().parse::<f64>() {
72 return if is_sci {
73 format!("{v:.prec$e}")
74 } else {
75 format!("{v:.prec$}")
76 };
77 }
78 }
79 }
80 }
81 return fmt_str.replace("{}", &val.to_string());
83 }
84
85 if let Some(spec) = fmt_str.strip_prefix('%') {
87 let (body, mode) = if let Some(rest) = spec.strip_suffix('e') {
88 (rest, 'e')
89 } else if let Some(rest) = spec.strip_suffix('E') {
90 (rest, 'E')
91 } else if let Some(rest) = spec.strip_suffix('f') {
92 (rest, 'f')
93 } else if let Some(rest) = spec.strip_suffix('g') {
94 (rest, 'g')
95 } else {
96 return format!("{val}");
98 };
99
100 if let Ok(v) = val.to_string().parse::<f64>() {
102 if let Some(dot_pos) = body.find('.') {
103 let prec_str = &body[dot_pos + 1..];
104 if let Ok(prec) = prec_str.parse::<usize>() {
105 return match mode {
106 'e' => format!("{v:.prec$e}"),
107 'E' => format!("{v:.prec$E}"),
108 _ => format!("{v:.prec$}"),
109 };
110 }
111 } else if body.is_empty() {
112 return match mode {
113 'e' => format!("{v:e}"),
114 'E' => format!("{v:E}"),
115 _ => format!("{v}"),
116 };
117 }
118 }
119 }
120
121 format!("{val}")
123}
124
125pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
136 path: P,
137 array: &Array<T, Ix2>,
138 opts: &SaveTxtOptions,
139) -> FerrayResult<()> {
140 let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
141 FerrayError::io_error(format!(
142 "failed to create file '{}': {e}",
143 path.as_ref().display()
144 ))
145 })?;
146
147 savetxt_to_writer(&mut file, array, opts)
148}
149
150pub fn savetxt_to_writer<T: Element + Display, W: Write>(
152 writer: &mut W,
153 array: &Array<T, Ix2>,
154 opts: &SaveTxtOptions,
155) -> FerrayResult<()> {
156 let shape = array.shape();
157 let nrows = shape[0];
158 let ncols = shape[1];
159
160 if let Some(ref header) = opts.header {
161 write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
162 writer
163 .write_all(opts.newline.as_bytes())
164 .map_err(|e| FerrayError::io_error(e.to_string()))?;
165 }
166
167 let slice = array
168 .as_slice()
169 .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
170
171 for row in 0..nrows {
172 for col in 0..ncols {
173 if col > 0 {
174 write!(writer, "{}", opts.delimiter)
175 .map_err(|e| FerrayError::io_error(e.to_string()))?;
176 }
177 let val = &slice[row * ncols + col];
178 if let Some(ref fmt_str) = opts.fmt {
179 let formatted = format_value(val, fmt_str);
185 write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
186 } else {
187 write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
188 }
189 }
190 writer
191 .write_all(opts.newline.as_bytes())
192 .map_err(|e| FerrayError::io_error(e.to_string()))?;
193 }
194
195 if let Some(ref footer) = opts.footer {
196 write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
197 writer
198 .write_all(opts.newline.as_bytes())
199 .map_err(|e| FerrayError::io_error(e.to_string()))?;
200 }
201
202 writer
203 .flush()
204 .map_err(|e| FerrayError::io_error(e.to_string()))?;
205 Ok(())
206}
207
208pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
219where
220 T: Element + FromStr,
221 T::Err: Display,
222 P: AsRef<Path>,
223{
224 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
225 FerrayError::io_error(format!(
226 "failed to read file '{}': {e}",
227 path.as_ref().display()
228 ))
229 })?;
230
231 loadtxt_from_str(&content, delimiter, skiprows)
232}
233
234pub fn loadtxt_from_str<T>(
236 content: &str,
237 delimiter: char,
238 skiprows: usize,
239) -> FerrayResult<Array<T, Ix2>>
240where
241 T: Element + FromStr,
242 T::Err: Display,
243{
244 let opts = TextParseOptions {
245 delimiter,
246 skiprows,
247 ..Default::default()
248 };
249
250 let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
251
252 if nrows == 0 {
253 return Array::from_vec(Ix2::new([0, 0]), vec![]);
254 }
255
256 let data: FerrayResult<Vec<T>> = cells
257 .iter()
258 .enumerate()
259 .map(|(i, cell)| {
260 cell.parse::<T>().map_err(|e| {
261 let row = i / ncols;
262 let col = i % ncols;
263 FerrayError::io_error(format!(
264 "failed to parse value '{cell}' at row {row}, col {col}: {e}"
265 ))
266 })
267 })
268 .collect();
269
270 let data = data?;
271 Array::from_vec(Ix2::new([nrows, ncols]), data)
272}
273
274pub fn genfromtxt<P: AsRef<Path>>(
285 path: P,
286 delimiter: char,
287 filling_value: f64,
288 skiprows: usize,
289 missing_values: &[&str],
290) -> FerrayResult<Array<f64, Ix2>> {
291 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
292 FerrayError::io_error(format!(
293 "failed to read file '{}': {e}",
294 path.as_ref().display()
295 ))
296 })?;
297
298 genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
299}
300
301pub fn genfromtxt_from_str(
303 content: &str,
304 delimiter: char,
305 filling_value: f64,
306 skiprows: usize,
307 missing_values: &[&str],
308) -> FerrayResult<Array<f64, Ix2>> {
309 let opts = TextParseOptions {
310 delimiter,
311 skiprows,
312 ..Default::default()
313 };
314
315 let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
317 for mv in missing_values {
318 if !all_missing.contains(mv) {
319 all_missing.push(mv);
320 }
321 }
322
323 let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
324
325 if nrows == 0 {
326 return Array::from_vec(Ix2::new([0, 0]), vec![]);
327 }
328
329 let data: FerrayResult<Vec<f64>> = cells
330 .iter()
331 .enumerate()
332 .map(|(i, cell)| match cell {
333 None => Ok(filling_value),
334 Some(s) => s.parse::<f64>().map_err(|e| {
335 let row = i / ncols;
336 let col = i % ncols;
337 FerrayError::io_error(format!(
338 "failed to parse value '{s}' at row {row}, col {col}: {e}"
339 ))
340 }),
341 })
342 .collect();
343
344 let data = data?;
345 Array::from_vec(Ix2::new([nrows, ncols]), data)
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 #[test]
353 fn loadtxt_simple_csv() {
354 let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
355 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
356 assert_eq!(arr.shape(), &[2, 3]);
357 assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
358 }
359
360 #[test]
361 fn loadtxt_with_skiprows() {
362 let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
363 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
364 assert_eq!(arr.shape(), &[2, 2]);
365 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
366 }
367
368 #[test]
369 fn loadtxt_tab_delimited() {
370 let content = "1\t2\t3\n4\t5\t6\n";
371 let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
372 assert_eq!(arr.shape(), &[2, 3]);
373 assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
374 }
375
376 #[test]
377 fn loadtxt_integers() {
378 let content = "10,20\n30,40\n";
379 let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
380 assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
381 }
382
383 #[test]
384 fn loadtxt_file_roundtrip() {
385 let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
386 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
387
388 let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
389 let _ = std::fs::create_dir_all(&dir);
390 let path = dir.join("test.csv");
391
392 savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
393 let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
394
395 assert_eq!(loaded.shape(), &[2, 3]);
396 assert_eq!(loaded.as_slice().unwrap(), &data[..]);
397 let _ = std::fs::remove_file(&path);
398 }
399
400 #[test]
401 fn savetxt_custom_delimiter() {
402 let data = vec![1.0f64, 2.0, 3.0, 4.0];
403 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
404
405 let mut buf = Vec::new();
406 let opts = SaveTxtOptions {
407 delimiter: '\t',
408 ..Default::default()
409 };
410 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
411 let output = String::from_utf8(buf).unwrap();
412 assert!(output.contains('\t'));
413 assert!(!output.contains(','));
414 }
415
416 #[test]
417 fn savetxt_with_header_footer() {
418 let data = vec![1.0f64, 2.0];
419 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
420
421 let mut buf = Vec::new();
422 let opts = SaveTxtOptions {
423 header: Some("# my header".to_string()),
424 footer: Some("# end".to_string()),
425 ..Default::default()
426 };
427 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
428 let output = String::from_utf8(buf).unwrap();
429 assert!(output.starts_with("# my header\n"));
430 assert!(output.ends_with("# end\n"));
431 }
432
433 #[test]
434 fn genfromtxt_missing_nan() {
435 let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
436 let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
437 assert_eq!(arr.shape(), &[3, 3]);
438 let slice = arr.as_slice().unwrap();
439 assert_eq!(slice[0], 1.0);
440 assert!(slice[4].is_nan()); assert!(slice[8].is_nan()); }
443
444 #[test]
445 fn genfromtxt_na_marker() {
446 let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
447 let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
448 assert_eq!(arr.shape(), &[2, 3]);
449 let slice = arr.as_slice().unwrap();
450 assert_eq!(slice[1], -999.0);
451 assert_eq!(slice[5], -999.0);
452 }
453
454 #[test]
455 fn genfromtxt_with_skiprows() {
456 let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
457 let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
458 assert_eq!(arr.shape(), &[2, 2]);
459 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
460 }
461
462 #[test]
463 fn genfromtxt_file() {
464 let content = "1.0,2.0\n,4.0\n";
465 let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
466 let _ = std::fs::create_dir_all(&dir);
467 let path = dir.join("genfromtxt_test.csv");
468 std::fs::write(&path, content).unwrap();
469
470 let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
471 assert_eq!(arr.shape(), &[2, 2]);
472 assert!(arr.as_slice().unwrap()[2].is_nan());
473 let _ = std::fs::remove_file(&path);
474 }
475
476 #[test]
477 fn loadtxt_empty() {
478 let content = "";
479 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
480 assert_eq!(arr.shape(), &[0, 0]);
481 }
482}