1pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::{Ix1, Ix2};
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25 pub delimiter: char,
27 pub fmt: Option<String>,
30 pub header: Option<String>,
32 pub footer: Option<String>,
34 pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39 fn default() -> Self {
40 Self {
41 delimiter: ',',
42 fmt: None,
43 header: None,
44 footer: None,
45 newline: "\n".to_string(),
46 }
47 }
48}
49
50fn format_value<T: Display>(val: &T, fmt_str: &str) -> String {
59 if fmt_str.contains('{') {
61 if let Some(spec) = fmt_str.strip_prefix("{:").and_then(|s| s.strip_suffix('}')) {
62 if let Some(prec_str) = spec.strip_prefix('.') {
63 let is_sci = prec_str.ends_with('e') || prec_str.ends_with('E');
64 let digits_str = if is_sci {
65 &prec_str[..prec_str.len() - 1]
66 } else {
67 prec_str
68 };
69 if let Ok(prec) = digits_str.parse::<usize>() {
70 if let Ok(v) = val.to_string().parse::<f64>() {
72 return if is_sci {
73 format!("{v:.prec$e}")
74 } else {
75 format!("{v:.prec$}")
76 };
77 }
78 }
79 }
80 }
81 return fmt_str.replace("{}", &val.to_string());
83 }
84
85 if let Some(spec) = fmt_str.strip_prefix('%') {
87 let (body, mode) = if let Some(rest) = spec.strip_suffix('e') {
88 (rest, 'e')
89 } else if let Some(rest) = spec.strip_suffix('E') {
90 (rest, 'E')
91 } else if let Some(rest) = spec.strip_suffix('f') {
92 (rest, 'f')
93 } else if let Some(rest) = spec.strip_suffix('g') {
94 (rest, 'g')
95 } else {
96 return format!("{val}");
98 };
99
100 if let Ok(v) = val.to_string().parse::<f64>() {
102 if let Some(dot_pos) = body.find('.') {
103 let prec_str = &body[dot_pos + 1..];
104 if let Ok(prec) = prec_str.parse::<usize>() {
105 return match mode {
106 'e' => format!("{v:.prec$e}"),
107 'E' => format!("{v:.prec$E}"),
108 _ => format!("{v:.prec$}"),
109 };
110 }
111 } else if body.is_empty() {
112 return match mode {
113 'e' => format!("{v:e}"),
114 'E' => format!("{v:E}"),
115 _ => format!("{v}"),
116 };
117 }
118 }
119 }
120
121 format!("{val}")
123}
124
125pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
136 path: P,
137 array: &Array<T, Ix2>,
138 opts: &SaveTxtOptions,
139) -> FerrayResult<()> {
140 let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
141 FerrayError::io_error(format!(
142 "failed to create file '{}': {e}",
143 path.as_ref().display()
144 ))
145 })?;
146
147 savetxt_to_writer(&mut file, array, opts)
148}
149
150pub fn savetxt_1d<T: Element + Display, P: AsRef<Path>>(
159 path: P,
160 array: &Array<T, Ix1>,
161 opts: &SaveTxtOptions,
162) -> FerrayResult<()> {
163 let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
164 FerrayError::io_error(format!(
165 "failed to create file '{}': {e}",
166 path.as_ref().display()
167 ))
168 })?;
169 savetxt_1d_to_writer(&mut file, array, opts)
170}
171
172pub fn savetxt_1d_to_writer<T: Element + Display, W: Write>(
174 writer: &mut W,
175 array: &Array<T, Ix1>,
176 opts: &SaveTxtOptions,
177) -> FerrayResult<()> {
178 if let Some(ref header) = opts.header {
179 write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
180 writer
181 .write_all(opts.newline.as_bytes())
182 .map_err(|e| FerrayError::io_error(e.to_string()))?;
183 }
184 let slice = array
185 .as_slice()
186 .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
187 for val in slice {
188 let formatted = if let Some(ref fmt_str) = opts.fmt {
189 format_value(val, fmt_str)
190 } else {
191 format!("{val}")
192 };
193 writer
194 .write_all(formatted.as_bytes())
195 .map_err(|e| FerrayError::io_error(e.to_string()))?;
196 writer
197 .write_all(opts.newline.as_bytes())
198 .map_err(|e| FerrayError::io_error(e.to_string()))?;
199 }
200 if let Some(ref footer) = opts.footer {
201 write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
202 writer
203 .write_all(opts.newline.as_bytes())
204 .map_err(|e| FerrayError::io_error(e.to_string()))?;
205 }
206 writer
207 .flush()
208 .map_err(|e| FerrayError::io_error(e.to_string()))?;
209 Ok(())
210}
211
212pub fn loadtxt_1d<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix1>>
221where
222 T: Element + FromStr,
223 T::Err: Display,
224 P: AsRef<Path>,
225{
226 let arr2 = loadtxt::<T, _>(path, delimiter, skiprows)?;
227 let shape = arr2.shape();
228 let n = shape[0] * shape[1];
229 let data: Vec<T> = arr2.iter().cloned().collect();
230 Array::<T, Ix1>::from_vec(Ix1::new([n]), data)
231}
232
233pub fn savetxt_to_writer<T: Element + Display, W: Write>(
235 writer: &mut W,
236 array: &Array<T, Ix2>,
237 opts: &SaveTxtOptions,
238) -> FerrayResult<()> {
239 let shape = array.shape();
240 let nrows = shape[0];
241 let ncols = shape[1];
242
243 if let Some(ref header) = opts.header {
244 write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
245 writer
246 .write_all(opts.newline.as_bytes())
247 .map_err(|e| FerrayError::io_error(e.to_string()))?;
248 }
249
250 let slice = array
251 .as_slice()
252 .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
253
254 for row in 0..nrows {
255 for col in 0..ncols {
256 if col > 0 {
257 write!(writer, "{}", opts.delimiter)
258 .map_err(|e| FerrayError::io_error(e.to_string()))?;
259 }
260 let val = &slice[row * ncols + col];
261 if let Some(ref fmt_str) = opts.fmt {
262 let formatted = format_value(val, fmt_str);
268 write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
269 } else {
270 write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
271 }
272 }
273 writer
274 .write_all(opts.newline.as_bytes())
275 .map_err(|e| FerrayError::io_error(e.to_string()))?;
276 }
277
278 if let Some(ref footer) = opts.footer {
279 write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
280 writer
281 .write_all(opts.newline.as_bytes())
282 .map_err(|e| FerrayError::io_error(e.to_string()))?;
283 }
284
285 writer
286 .flush()
287 .map_err(|e| FerrayError::io_error(e.to_string()))?;
288 Ok(())
289}
290
291pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
302where
303 T: Element + FromStr,
304 T::Err: Display,
305 P: AsRef<Path>,
306{
307 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
308 FerrayError::io_error(format!(
309 "failed to read file '{}': {e}",
310 path.as_ref().display()
311 ))
312 })?;
313
314 loadtxt_from_str(&content, delimiter, skiprows)
315}
316
317pub fn loadtxt_from_str<T>(
319 content: &str,
320 delimiter: char,
321 skiprows: usize,
322) -> FerrayResult<Array<T, Ix2>>
323where
324 T: Element + FromStr,
325 T::Err: Display,
326{
327 let opts = TextParseOptions {
328 delimiter,
329 skiprows,
330 ..Default::default()
331 };
332
333 let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
334
335 if nrows == 0 {
336 return Array::from_vec(Ix2::new([0, 0]), vec![]);
337 }
338
339 let data: FerrayResult<Vec<T>> = cells
340 .iter()
341 .enumerate()
342 .map(|(i, cell)| {
343 cell.parse::<T>().map_err(|e| {
344 let row = i / ncols;
345 let col = i % ncols;
346 FerrayError::io_error(format!(
347 "failed to parse value '{cell}' at row {row}, col {col}: {e}"
348 ))
349 })
350 })
351 .collect();
352
353 let data = data?;
354 Array::from_vec(Ix2::new([nrows, ncols]), data)
355}
356
357pub fn genfromtxt<P: AsRef<Path>>(
368 path: P,
369 delimiter: char,
370 filling_value: f64,
371 skiprows: usize,
372 missing_values: &[&str],
373) -> FerrayResult<Array<f64, Ix2>> {
374 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
375 FerrayError::io_error(format!(
376 "failed to read file '{}': {e}",
377 path.as_ref().display()
378 ))
379 })?;
380
381 genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
382}
383
384pub fn genfromtxt_from_str(
386 content: &str,
387 delimiter: char,
388 filling_value: f64,
389 skiprows: usize,
390 missing_values: &[&str],
391) -> FerrayResult<Array<f64, Ix2>> {
392 let opts = TextParseOptions {
393 delimiter,
394 skiprows,
395 ..Default::default()
396 };
397
398 let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
400 for mv in missing_values {
401 if !all_missing.contains(mv) {
402 all_missing.push(mv);
403 }
404 }
405
406 let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
407
408 if nrows == 0 {
409 return Array::from_vec(Ix2::new([0, 0]), vec![]);
410 }
411
412 let data: FerrayResult<Vec<f64>> = cells
413 .iter()
414 .enumerate()
415 .map(|(i, cell)| match cell {
416 None => Ok(filling_value),
417 Some(s) => s.parse::<f64>().map_err(|e| {
418 let row = i / ncols;
419 let col = i % ncols;
420 FerrayError::io_error(format!(
421 "failed to parse value '{s}' at row {row}, col {col}: {e}"
422 ))
423 }),
424 })
425 .collect();
426
427 let data = data?;
428 Array::from_vec(Ix2::new([nrows, ncols]), data)
429}
430
431pub fn fromregex<T>(content: &str, regex: &str) -> FerrayResult<Array<T, Ix2>>
454where
455 T: Element + FromStr,
456 T::Err: Display,
457{
458 let re = regex::Regex::new(regex)
459 .map_err(|e| FerrayError::invalid_value(format!("fromregex: invalid regex: {e}")))?;
460 let n_groups = re.captures_len().saturating_sub(1);
461 if n_groups == 0 {
462 return Err(FerrayError::invalid_value(
463 "fromregex: regex must contain at least one capture group",
464 ));
465 }
466 let mut data: Vec<T> = Vec::new();
467 let mut nrows = 0usize;
468 'lines: for line in content.lines() {
469 if let Some(caps) = re.captures(line) {
470 let start = data.len();
472 for g in 1..=n_groups {
473 let m = caps.get(g).map_or("", |m| m.as_str());
474 match m.parse::<T>() {
475 Ok(v) => data.push(v),
476 Err(_) => {
477 data.truncate(start);
479 continue 'lines;
480 }
481 }
482 }
483 nrows += 1;
484 }
485 }
486 Array::from_vec(Ix2::new([nrows, n_groups]), data)
487}
488
489pub fn fromregex_from_file<T, P>(path: P, regex: &str) -> FerrayResult<Array<T, Ix2>>
497where
498 T: Element + FromStr,
499 T::Err: Display,
500 P: AsRef<Path>,
501{
502 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
503 FerrayError::io_error(format!(
504 "fromregex: failed to read file '{}': {e}",
505 path.as_ref().display()
506 ))
507 })?;
508 fromregex::<T>(&content, regex)
509}
510
511#[cfg(test)]
512#[allow(clippy::float_cmp)] mod tests {
514 use super::*;
515
516 #[test]
519 fn savetxt_1d_writes_one_value_per_line() {
520 let arr = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.5, 2.5, 3.0, 4.0]).unwrap();
521 let mut buf: Vec<u8> = Vec::new();
522 let opts = SaveTxtOptions::default();
523 savetxt_1d_to_writer(&mut buf, &arr, &opts).unwrap();
524 let s = String::from_utf8(buf).unwrap();
525 assert_eq!(s, "1.5\n2.5\n3\n4\n");
526 }
527
528 #[test]
529 fn savetxt_1d_then_loadtxt_1d_roundtrip() {
530 let arr =
531 Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, -2.5, 3.5, 0.0, 7.25]).unwrap();
532 let dir = tempfile::tempdir().unwrap();
533 let p = dir.path().join("vec.txt");
534 let opts = SaveTxtOptions::default();
535 savetxt_1d(&p, &arr, &opts).unwrap();
536 let back: Array<f64, Ix1> = loadtxt_1d(&p, ',', 0).unwrap();
537 assert_eq!(back.shape(), &[5]);
538 assert_eq!(back.as_slice().unwrap(), arr.as_slice().unwrap());
539 }
540
541 #[test]
542 fn loadtxt_1d_flattens_multicolumn_input() {
543 let dir = tempfile::tempdir().unwrap();
545 let p = dir.path().join("rect.txt");
546 std::fs::write(&p, "1,2\n3,4\n5,6\n").unwrap();
547 let v: Array<i64, Ix1> = loadtxt_1d(&p, ',', 0).unwrap();
548 assert_eq!(v.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
549 }
550
551 #[test]
552 fn loadtxt_simple_csv() {
553 let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
554 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
555 assert_eq!(arr.shape(), &[2, 3]);
556 assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
557 }
558
559 #[test]
560 fn loadtxt_with_skiprows() {
561 let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
562 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
563 assert_eq!(arr.shape(), &[2, 2]);
564 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
565 }
566
567 #[test]
568 fn loadtxt_tab_delimited() {
569 let content = "1\t2\t3\n4\t5\t6\n";
570 let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
571 assert_eq!(arr.shape(), &[2, 3]);
572 assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
573 }
574
575 #[test]
576 fn loadtxt_integers() {
577 let content = "10,20\n30,40\n";
578 let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
579 assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
580 }
581
582 #[test]
583 fn loadtxt_file_roundtrip() {
584 let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
585 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
586
587 let dir = tempfile::TempDir::new().unwrap();
588 let path = dir.path().join("test.csv");
589
590 savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
591 let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
592
593 assert_eq!(loaded.shape(), &[2, 3]);
594 assert_eq!(loaded.as_slice().unwrap(), &data[..]);
595 }
596
597 #[test]
598 fn savetxt_custom_delimiter() {
599 let data = vec![1.0f64, 2.0, 3.0, 4.0];
600 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
601
602 let mut buf = Vec::new();
603 let opts = SaveTxtOptions {
604 delimiter: '\t',
605 ..Default::default()
606 };
607 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
608 let output = String::from_utf8(buf).unwrap();
609 assert!(output.contains('\t'));
610 assert!(!output.contains(','));
611 }
612
613 #[test]
614 fn savetxt_with_header_footer() {
615 let data = vec![1.0f64, 2.0];
616 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
617
618 let mut buf = Vec::new();
619 let opts = SaveTxtOptions {
620 header: Some("# my header".to_string()),
621 footer: Some("# end".to_string()),
622 ..Default::default()
623 };
624 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
625 let output = String::from_utf8(buf).unwrap();
626 assert!(output.starts_with("# my header\n"));
627 assert!(output.ends_with("# end\n"));
628 }
629
630 #[test]
631 fn genfromtxt_missing_nan() {
632 let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
633 let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
634 assert_eq!(arr.shape(), &[3, 3]);
635 let slice = arr.as_slice().unwrap();
636 assert_eq!(slice[0], 1.0);
637 assert!(slice[4].is_nan()); assert!(slice[8].is_nan()); }
640
641 #[test]
642 fn genfromtxt_na_marker() {
643 let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
644 let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
645 assert_eq!(arr.shape(), &[2, 3]);
646 let slice = arr.as_slice().unwrap();
647 assert_eq!(slice[1], -999.0);
648 assert_eq!(slice[5], -999.0);
649 }
650
651 #[test]
652 fn genfromtxt_with_skiprows() {
653 let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
654 let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
655 assert_eq!(arr.shape(), &[2, 2]);
656 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
657 }
658
659 #[test]
660 fn genfromtxt_file() {
661 let content = "1.0,2.0\n,4.0\n";
662 let dir = tempfile::TempDir::new().unwrap();
663 let path = dir.path().join("genfromtxt_test.csv");
664 std::fs::write(&path, content).unwrap();
665
666 let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
667 assert_eq!(arr.shape(), &[2, 2]);
668 assert!(arr.as_slice().unwrap()[2].is_nan());
669 }
670
671 #[test]
672 fn loadtxt_empty() {
673 let content = "";
674 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
675 assert_eq!(arr.shape(), &[0, 0]);
676 }
677
678 #[test]
681 fn fromregex_basic_one_group() {
682 let s = "value=10\nvalue=20\nirrelevant\nvalue=30\n";
684 let arr: Array<i32, Ix2> = fromregex(s, r"^value=(\d+)$").unwrap();
685 assert_eq!(arr.shape(), &[3, 1]);
686 assert_eq!(arr.as_slice().unwrap(), &[10, 20, 30]);
687 }
688
689 #[test]
690 fn fromregex_multiple_groups() {
691 let s = "1,2\n3,4\n5,6\n";
693 let arr: Array<f64, Ix2> = fromregex(s, r"^([\d.]+),([\d.]+)$").unwrap();
694 assert_eq!(arr.shape(), &[3, 2]);
695 assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
696 }
697
698 #[test]
699 fn fromregex_no_groups_errs() {
700 let r: FerrayResult<Array<i32, Ix2>> = fromregex("a\nb\n", r"^[ab]$");
701 assert!(r.is_err());
702 }
703
704 #[test]
705 fn fromregex_invalid_regex_errs() {
706 let r: FerrayResult<Array<i32, Ix2>> = fromregex("", r"(unclosed");
707 assert!(r.is_err());
708 }
709
710 #[test]
711 fn fromregex_skips_unparseable_rows() {
712 let s = "v=10\nv=foo\nv=20\n";
714 let arr: Array<i32, Ix2> = fromregex(s, r"^v=(\S+)$").unwrap();
715 assert_eq!(arr.shape(), &[2, 1]);
716 assert_eq!(arr.as_slice().unwrap(), &[10, 20]);
717 }
718
719 #[test]
720 fn fromregex_from_file_roundtrip() {
721 let dir = tempfile::TempDir::new().unwrap();
722 let path = dir.path().join("regex_test.txt");
723 std::fs::write(&path, "x=1\nx=2\nx=3\n").unwrap();
724 let arr: Array<i32, Ix2> = fromregex_from_file(&path, r"^x=(\d+)$").unwrap();
725 assert_eq!(arr.shape(), &[3, 1]);
726 assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3]);
727 }
728}