1pub mod parser;
8
9use std::fmt::Display;
10use std::fs;
11use std::io::Write;
12use std::path::Path;
13use std::str::FromStr;
14
15use ferray_core::Array;
16use ferray_core::dimension::Ix2;
17use ferray_core::dtype::Element;
18use ferray_core::error::{FerrayError, FerrayResult};
19
20use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
21
22#[derive(Debug, Clone)]
24pub struct SaveTxtOptions {
25 pub delimiter: char,
27 pub fmt: Option<String>,
30 pub header: Option<String>,
32 pub footer: Option<String>,
34 pub newline: String,
36}
37
38impl Default for SaveTxtOptions {
39 fn default() -> Self {
40 Self {
41 delimiter: ',',
42 fmt: None,
43 header: None,
44 footer: None,
45 newline: "\n".to_string(),
46 }
47 }
48}
49
50fn format_value<T: Display>(val: &T, fmt_str: &str) -> String {
59 if fmt_str.contains('{') {
61 if let Some(spec) = fmt_str.strip_prefix("{:").and_then(|s| s.strip_suffix('}')) {
62 if let Some(prec_str) = spec.strip_prefix('.') {
63 let is_sci = prec_str.ends_with('e') || prec_str.ends_with('E');
64 let digits_str = if is_sci {
65 &prec_str[..prec_str.len() - 1]
66 } else {
67 prec_str
68 };
69 if let Ok(prec) = digits_str.parse::<usize>() {
70 if let Ok(v) = val.to_string().parse::<f64>() {
72 return if is_sci {
73 format!("{v:.prec$e}")
74 } else {
75 format!("{v:.prec$}")
76 };
77 }
78 }
79 }
80 }
81 return fmt_str.replace("{}", &val.to_string());
83 }
84
85 if let Some(spec) = fmt_str.strip_prefix('%') {
87 let (body, mode) = if let Some(rest) = spec.strip_suffix('e') {
88 (rest, 'e')
89 } else if let Some(rest) = spec.strip_suffix('E') {
90 (rest, 'E')
91 } else if let Some(rest) = spec.strip_suffix('f') {
92 (rest, 'f')
93 } else if let Some(rest) = spec.strip_suffix('g') {
94 (rest, 'g')
95 } else {
96 return format!("{val}");
98 };
99
100 if let Ok(v) = val.to_string().parse::<f64>() {
102 if let Some(dot_pos) = body.find('.') {
103 let prec_str = &body[dot_pos + 1..];
104 if let Ok(prec) = prec_str.parse::<usize>() {
105 return match mode {
106 'e' => format!("{v:.prec$e}"),
107 'E' => format!("{v:.prec$E}"),
108 _ => format!("{v:.prec$}"),
109 };
110 }
111 } else if body.is_empty() {
112 return match mode {
113 'e' => format!("{v:e}"),
114 'E' => format!("{v:E}"),
115 _ => format!("{v}"),
116 };
117 }
118 }
119 }
120
121 format!("{val}")
123}
124
125pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
136 path: P,
137 array: &Array<T, Ix2>,
138 opts: &SaveTxtOptions,
139) -> FerrayResult<()> {
140 let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
141 FerrayError::io_error(format!(
142 "failed to create file '{}': {e}",
143 path.as_ref().display()
144 ))
145 })?;
146
147 savetxt_to_writer(&mut file, array, opts)
148}
149
150pub fn savetxt_to_writer<T: Element + Display, W: Write>(
152 writer: &mut W,
153 array: &Array<T, Ix2>,
154 opts: &SaveTxtOptions,
155) -> FerrayResult<()> {
156 let shape = array.shape();
157 let nrows = shape[0];
158 let ncols = shape[1];
159
160 if let Some(ref header) = opts.header {
161 write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
162 writer
163 .write_all(opts.newline.as_bytes())
164 .map_err(|e| FerrayError::io_error(e.to_string()))?;
165 }
166
167 let slice = array
168 .as_slice()
169 .ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
170
171 for row in 0..nrows {
172 for col in 0..ncols {
173 if col > 0 {
174 write!(writer, "{}", opts.delimiter)
175 .map_err(|e| FerrayError::io_error(e.to_string()))?;
176 }
177 let val = &slice[row * ncols + col];
178 if let Some(ref fmt_str) = opts.fmt {
179 let formatted = format_value(val, fmt_str);
185 write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
186 } else {
187 write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
188 }
189 }
190 writer
191 .write_all(opts.newline.as_bytes())
192 .map_err(|e| FerrayError::io_error(e.to_string()))?;
193 }
194
195 if let Some(ref footer) = opts.footer {
196 write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
197 writer
198 .write_all(opts.newline.as_bytes())
199 .map_err(|e| FerrayError::io_error(e.to_string()))?;
200 }
201
202 writer
203 .flush()
204 .map_err(|e| FerrayError::io_error(e.to_string()))?;
205 Ok(())
206}
207
208pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
219where
220 T: Element + FromStr,
221 T::Err: Display,
222 P: AsRef<Path>,
223{
224 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
225 FerrayError::io_error(format!(
226 "failed to read file '{}': {e}",
227 path.as_ref().display()
228 ))
229 })?;
230
231 loadtxt_from_str(&content, delimiter, skiprows)
232}
233
234pub fn loadtxt_from_str<T>(
236 content: &str,
237 delimiter: char,
238 skiprows: usize,
239) -> FerrayResult<Array<T, Ix2>>
240where
241 T: Element + FromStr,
242 T::Err: Display,
243{
244 let opts = TextParseOptions {
245 delimiter,
246 skiprows,
247 ..Default::default()
248 };
249
250 let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
251
252 if nrows == 0 {
253 return Array::from_vec(Ix2::new([0, 0]), vec![]);
254 }
255
256 let data: FerrayResult<Vec<T>> = cells
257 .iter()
258 .enumerate()
259 .map(|(i, cell)| {
260 cell.parse::<T>().map_err(|e| {
261 let row = i / ncols;
262 let col = i % ncols;
263 FerrayError::io_error(format!(
264 "failed to parse value '{cell}' at row {row}, col {col}: {e}"
265 ))
266 })
267 })
268 .collect();
269
270 let data = data?;
271 Array::from_vec(Ix2::new([nrows, ncols]), data)
272}
273
274pub fn genfromtxt<P: AsRef<Path>>(
285 path: P,
286 delimiter: char,
287 filling_value: f64,
288 skiprows: usize,
289 missing_values: &[&str],
290) -> FerrayResult<Array<f64, Ix2>> {
291 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
292 FerrayError::io_error(format!(
293 "failed to read file '{}': {e}",
294 path.as_ref().display()
295 ))
296 })?;
297
298 genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
299}
300
301pub fn genfromtxt_from_str(
303 content: &str,
304 delimiter: char,
305 filling_value: f64,
306 skiprows: usize,
307 missing_values: &[&str],
308) -> FerrayResult<Array<f64, Ix2>> {
309 let opts = TextParseOptions {
310 delimiter,
311 skiprows,
312 ..Default::default()
313 };
314
315 let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
317 for mv in missing_values {
318 if !all_missing.contains(mv) {
319 all_missing.push(mv);
320 }
321 }
322
323 let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
324
325 if nrows == 0 {
326 return Array::from_vec(Ix2::new([0, 0]), vec![]);
327 }
328
329 let data: FerrayResult<Vec<f64>> = cells
330 .iter()
331 .enumerate()
332 .map(|(i, cell)| match cell {
333 None => Ok(filling_value),
334 Some(s) => s.parse::<f64>().map_err(|e| {
335 let row = i / ncols;
336 let col = i % ncols;
337 FerrayError::io_error(format!(
338 "failed to parse value '{s}' at row {row}, col {col}: {e}"
339 ))
340 }),
341 })
342 .collect();
343
344 let data = data?;
345 Array::from_vec(Ix2::new([nrows, ncols]), data)
346}
347
348pub fn fromregex<T>(content: &str, regex: &str) -> FerrayResult<Array<T, Ix2>>
371where
372 T: Element + FromStr,
373 T::Err: Display,
374{
375 let re = regex::Regex::new(regex)
376 .map_err(|e| FerrayError::invalid_value(format!("fromregex: invalid regex: {e}")))?;
377 let n_groups = re.captures_len().saturating_sub(1);
378 if n_groups == 0 {
379 return Err(FerrayError::invalid_value(
380 "fromregex: regex must contain at least one capture group",
381 ));
382 }
383 let mut data: Vec<T> = Vec::new();
384 let mut nrows = 0usize;
385 'lines: for line in content.lines() {
386 if let Some(caps) = re.captures(line) {
387 let start = data.len();
389 for g in 1..=n_groups {
390 let m = caps.get(g).map_or("", |m| m.as_str());
391 match m.parse::<T>() {
392 Ok(v) => data.push(v),
393 Err(_) => {
394 data.truncate(start);
396 continue 'lines;
397 }
398 }
399 }
400 nrows += 1;
401 }
402 }
403 Array::from_vec(Ix2::new([nrows, n_groups]), data)
404}
405
406pub fn fromregex_from_file<T, P>(path: P, regex: &str) -> FerrayResult<Array<T, Ix2>>
414where
415 T: Element + FromStr,
416 T::Err: Display,
417 P: AsRef<Path>,
418{
419 let content = fs::read_to_string(path.as_ref()).map_err(|e| {
420 FerrayError::io_error(format!(
421 "fromregex: failed to read file '{}': {e}",
422 path.as_ref().display()
423 ))
424 })?;
425 fromregex::<T>(&content, regex)
426}
427
428#[cfg(test)]
429#[allow(clippy::float_cmp)] mod tests {
431 use super::*;
432
433 #[test]
434 fn loadtxt_simple_csv() {
435 let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
436 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
437 assert_eq!(arr.shape(), &[2, 3]);
438 assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
439 }
440
441 #[test]
442 fn loadtxt_with_skiprows() {
443 let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
444 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
445 assert_eq!(arr.shape(), &[2, 2]);
446 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
447 }
448
449 #[test]
450 fn loadtxt_tab_delimited() {
451 let content = "1\t2\t3\n4\t5\t6\n";
452 let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
453 assert_eq!(arr.shape(), &[2, 3]);
454 assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
455 }
456
457 #[test]
458 fn loadtxt_integers() {
459 let content = "10,20\n30,40\n";
460 let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
461 assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
462 }
463
464 #[test]
465 fn loadtxt_file_roundtrip() {
466 let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
467 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
468
469 let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
470 let _ = std::fs::create_dir_all(&dir);
471 let path = dir.join("test.csv");
472
473 savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
474 let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
475
476 assert_eq!(loaded.shape(), &[2, 3]);
477 assert_eq!(loaded.as_slice().unwrap(), &data[..]);
478 let _ = std::fs::remove_file(&path);
479 }
480
481 #[test]
482 fn savetxt_custom_delimiter() {
483 let data = vec![1.0f64, 2.0, 3.0, 4.0];
484 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
485
486 let mut buf = Vec::new();
487 let opts = SaveTxtOptions {
488 delimiter: '\t',
489 ..Default::default()
490 };
491 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
492 let output = String::from_utf8(buf).unwrap();
493 assert!(output.contains('\t'));
494 assert!(!output.contains(','));
495 }
496
497 #[test]
498 fn savetxt_with_header_footer() {
499 let data = vec![1.0f64, 2.0];
500 let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
501
502 let mut buf = Vec::new();
503 let opts = SaveTxtOptions {
504 header: Some("# my header".to_string()),
505 footer: Some("# end".to_string()),
506 ..Default::default()
507 };
508 savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
509 let output = String::from_utf8(buf).unwrap();
510 assert!(output.starts_with("# my header\n"));
511 assert!(output.ends_with("# end\n"));
512 }
513
514 #[test]
515 fn genfromtxt_missing_nan() {
516 let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
517 let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
518 assert_eq!(arr.shape(), &[3, 3]);
519 let slice = arr.as_slice().unwrap();
520 assert_eq!(slice[0], 1.0);
521 assert!(slice[4].is_nan()); assert!(slice[8].is_nan()); }
524
525 #[test]
526 fn genfromtxt_na_marker() {
527 let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
528 let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
529 assert_eq!(arr.shape(), &[2, 3]);
530 let slice = arr.as_slice().unwrap();
531 assert_eq!(slice[1], -999.0);
532 assert_eq!(slice[5], -999.0);
533 }
534
535 #[test]
536 fn genfromtxt_with_skiprows() {
537 let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
538 let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
539 assert_eq!(arr.shape(), &[2, 2]);
540 assert_eq!(arr.as_slice().unwrap()[0], 1.0);
541 }
542
543 #[test]
544 fn genfromtxt_file() {
545 let content = "1.0,2.0\n,4.0\n";
546 let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
547 let _ = std::fs::create_dir_all(&dir);
548 let path = dir.join("genfromtxt_test.csv");
549 std::fs::write(&path, content).unwrap();
550
551 let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
552 assert_eq!(arr.shape(), &[2, 2]);
553 assert!(arr.as_slice().unwrap()[2].is_nan());
554 let _ = std::fs::remove_file(&path);
555 }
556
557 #[test]
558 fn loadtxt_empty() {
559 let content = "";
560 let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
561 assert_eq!(arr.shape(), &[0, 0]);
562 }
563
564 #[test]
567 fn fromregex_basic_one_group() {
568 let s = "value=10\nvalue=20\nirrelevant\nvalue=30\n";
570 let arr: Array<i32, Ix2> = fromregex(s, r"^value=(\d+)$").unwrap();
571 assert_eq!(arr.shape(), &[3, 1]);
572 assert_eq!(arr.as_slice().unwrap(), &[10, 20, 30]);
573 }
574
575 #[test]
576 fn fromregex_multiple_groups() {
577 let s = "1,2\n3,4\n5,6\n";
579 let arr: Array<f64, Ix2> = fromregex(s, r"^([\d.]+),([\d.]+)$").unwrap();
580 assert_eq!(arr.shape(), &[3, 2]);
581 assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
582 }
583
584 #[test]
585 fn fromregex_no_groups_errs() {
586 let r: FerrayResult<Array<i32, Ix2>> = fromregex("a\nb\n", r"^[ab]$");
587 assert!(r.is_err());
588 }
589
590 #[test]
591 fn fromregex_invalid_regex_errs() {
592 let r: FerrayResult<Array<i32, Ix2>> = fromregex("", r"(unclosed");
593 assert!(r.is_err());
594 }
595
596 #[test]
597 fn fromregex_skips_unparseable_rows() {
598 let s = "v=10\nv=foo\nv=20\n";
600 let arr: Array<i32, Ix2> = fromregex(s, r"^v=(\S+)$").unwrap();
601 assert_eq!(arr.shape(), &[2, 1]);
602 assert_eq!(arr.as_slice().unwrap(), &[10, 20]);
603 }
604
605 #[test]
606 fn fromregex_from_file_roundtrip() {
607 let dir = std::env::temp_dir().join(format!("ferray_io_fromregex_{}", std::process::id()));
608 let _ = std::fs::create_dir_all(&dir);
609 let path = dir.join("regex_test.txt");
610 std::fs::write(&path, "x=1\nx=2\nx=3\n").unwrap();
611 let arr: Array<i32, Ix2> = fromregex_from_file(&path, r"^x=(\d+)$").unwrap();
612 assert_eq!(arr.shape(), &[3, 1]);
613 assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3]);
614 let _ = std::fs::remove_file(&path);
615 }
616}